summaryrefslogtreecommitdiff
path: root/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp179
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp86
-rw-r--r--lib/Analysis/AliasSetTracker.cpp204
-rw-r--r--lib/Analysis/Analysis.cpp4
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp340
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp4
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp16
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp10
-rw-r--r--lib/Analysis/CFG.cpp5
-rw-r--r--lib/Analysis/CFGPrinter.cpp14
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp20
-rw-r--r--lib/Analysis/CFLGraph.h2
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp15
-rw-r--r--lib/Analysis/CMakeLists.txt7
-rw-r--r--lib/Analysis/CallGraph.cpp3
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp51
-rw-r--r--lib/Analysis/CaptureTracking.cpp50
-rw-r--r--lib/Analysis/CmpInstAnalysis.cpp28
-rw-r--r--lib/Analysis/ConstantFolding.cpp317
-rw-r--r--lib/Analysis/DemandedBits.cpp195
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp11
-rw-r--r--lib/Analysis/DivergenceAnalysis.cpp579
-rw-r--r--lib/Analysis/EHPersonalities.cpp2
-rw-r--r--lib/Analysis/GlobalsModRef.cpp40
-rw-r--r--lib/Analysis/GuardUtils.cpp21
-rw-r--r--lib/Analysis/IVDescriptors.cpp1089
-rw-r--r--lib/Analysis/IndirectCallPromotionAnalysis.cpp2
-rw-r--r--lib/Analysis/InlineCost.cpp133
-rw-r--r--lib/Analysis/InstructionPrecedenceTracking.cpp157
-rw-r--r--lib/Analysis/InstructionSimplify.cpp518
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp21
-rw-r--r--lib/Analysis/LazyCallGraph.cpp2
-rw-r--r--lib/Analysis/LazyValueInfo.cpp75
-rw-r--r--lib/Analysis/LegacyDivergenceAnalysis.cpp391
-rw-r--r--lib/Analysis/Lint.cpp4
-rw-r--r--lib/Analysis/Loads.cpp6
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp60
-rw-r--r--lib/Analysis/LoopAnalysisManager.cpp4
-rw-r--r--lib/Analysis/LoopInfo.cpp135
-rw-r--r--lib/Analysis/LoopPass.cpp32
-rw-r--r--lib/Analysis/MemDepPrinter.cpp5
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp100
-rw-r--r--lib/Analysis/MemoryLocation.cpp96
-rw-r--r--lib/Analysis/MemorySSA.cpp437
-rw-r--r--lib/Analysis/MemorySSAUpdater.cpp544
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp137
-rw-r--r--lib/Analysis/MustExecute.cpp255
-rw-r--r--lib/Analysis/ObjCARCAliasAnalysis.cpp8
-rw-r--r--lib/Analysis/ObjCARCInstKind.cpp152
-rw-r--r--lib/Analysis/OrderedBasicBlock.cpp3
-rw-r--r--lib/Analysis/OrderedInstructions.cpp51
-rw-r--r--lib/Analysis/PhiValues.cpp17
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp45
-rw-r--r--lib/Analysis/RegionPass.cpp1
-rw-r--r--lib/Analysis/ScalarEvolution.cpp480
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp14
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp2
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp33
-rw-r--r--lib/Analysis/StackSafetyAnalysis.cpp673
-rw-r--r--lib/Analysis/SyncDependenceAnalysis.cpp380
-rw-r--r--lib/Analysis/SyntheticCountsUtils.cpp34
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp34
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp119
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp31
-rw-r--r--lib/Analysis/TypeMetadataUtils.cpp42
-rw-r--r--lib/Analysis/ValueTracking.cpp648
-rw-r--r--lib/Analysis/VectorUtils.cpp530
67 files changed, 7708 insertions, 1995 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index a6585df949f8..3446aef39938 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -40,7 +40,6 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -118,11 +117,11 @@ bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
return false;
}
-ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getArgModRefInfo(CS, ArgIdx));
+ Result = intersectModRef(Result, AA->getArgModRefInfo(Call, ArgIdx));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -132,11 +131,11 @@ ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
return Result;
}
-ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
// We may have two calls.
- if (auto CS = ImmutableCallSite(I)) {
+ if (const auto *Call1 = dyn_cast<CallBase>(I)) {
// Check if the two calls modify the same memory.
- return getModRefInfo(CS, Call);
+ return getModRefInfo(Call1, Call2);
} else if (I->isFenceLike()) {
// If this is a fence, just return ModRef.
return ModRefInfo::ModRef;
@@ -146,19 +145,19 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
// is that if the call references what this instruction
// defines, it must be clobbered by this location.
const MemoryLocation DefLoc = MemoryLocation::get(I);
- ModRefInfo MR = getModRefInfo(Call, DefLoc);
+ ModRefInfo MR = getModRefInfo(Call2, DefLoc);
if (isModOrRefSet(MR))
return setModAndRef(MR);
}
return ModRefInfo::NoModRef;
}
-ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(CS, Loc));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -167,7 +166,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
// Try to refine the mod-ref info further using other API entry points to the
// aggregate set of AA results.
- auto MRB = getModRefBehavior(CS);
+ auto MRB = getModRefBehavior(Call);
if (MRB == FMRB_DoesNotAccessMemory ||
MRB == FMRB_OnlyAccessesInaccessibleMem)
return ModRefInfo::NoModRef;
@@ -178,20 +177,19 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
Result = clearRef(Result);
if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
- bool DoesAlias = false;
bool IsMustAlias = true;
ModRefInfo AllArgsMask = ModRefInfo::NoModRef;
if (doesAccessArgPointees(MRB)) {
- for (auto AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) {
+ for (auto AI = Call->arg_begin(), AE = Call->arg_end(); AI != AE; ++AI) {
const Value *Arg = *AI;
if (!Arg->getType()->isPointerTy())
continue;
- unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
- MemoryLocation ArgLoc = MemoryLocation::getForArgument(CS, ArgIdx, TLI);
+ unsigned ArgIdx = std::distance(Call->arg_begin(), AI);
+ MemoryLocation ArgLoc =
+ MemoryLocation::getForArgument(Call, ArgIdx, TLI);
AliasResult ArgAlias = alias(ArgLoc, Loc);
if (ArgAlias != NoAlias) {
- ModRefInfo ArgMask = getArgModRefInfo(CS, ArgIdx);
- DoesAlias = true;
+ ModRefInfo ArgMask = getArgModRefInfo(Call, ArgIdx);
AllArgsMask = unionModRef(AllArgsMask, ArgMask);
}
// Conservatively clear IsMustAlias unless only MustAlias is found.
@@ -199,7 +197,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
}
}
// Return NoModRef if no alias found with any argument.
- if (!DoesAlias)
+ if (isNoModRef(AllArgsMask))
return ModRefInfo::NoModRef;
// Logical & between other AA analyses and argument analysis.
Result = intersectModRef(Result, AllArgsMask);
@@ -215,12 +213,12 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
return Result;
}
-ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
ModRefInfo Result = ModRefInfo::ModRef;
for (const auto &AA : AAs) {
- Result = intersectModRef(Result, AA->getModRefInfo(CS1, CS2));
+ Result = intersectModRef(Result, AA->getModRefInfo(Call1, Call2));
// Early-exit the moment we reach the bottom of the lattice.
if (isNoModRef(Result))
@@ -230,59 +228,61 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
// Try to refine the mod-ref info further using other API entry points to the
// aggregate set of AA results.
- // If CS1 or CS2 are readnone, they don't interact.
- auto CS1B = getModRefBehavior(CS1);
- if (CS1B == FMRB_DoesNotAccessMemory)
+ // If Call1 or Call2 are readnone, they don't interact.
+ auto Call1B = getModRefBehavior(Call1);
+ if (Call1B == FMRB_DoesNotAccessMemory)
return ModRefInfo::NoModRef;
- auto CS2B = getModRefBehavior(CS2);
- if (CS2B == FMRB_DoesNotAccessMemory)
+ auto Call2B = getModRefBehavior(Call2);
+ if (Call2B == FMRB_DoesNotAccessMemory)
return ModRefInfo::NoModRef;
// If they both only read from memory, there is no dependence.
- if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+ if (onlyReadsMemory(Call1B) && onlyReadsMemory(Call2B))
return ModRefInfo::NoModRef;
- // If CS1 only reads memory, the only dependence on CS2 can be
- // from CS1 reading memory written by CS2.
- if (onlyReadsMemory(CS1B))
+ // If Call1 only reads memory, the only dependence on Call2 can be
+ // from Call1 reading memory written by Call2.
+ if (onlyReadsMemory(Call1B))
Result = clearMod(Result);
- else if (doesNotReadMemory(CS1B))
+ else if (doesNotReadMemory(Call1B))
Result = clearRef(Result);
- // If CS2 only access memory through arguments, accumulate the mod/ref
- // information from CS1's references to the memory referenced by
- // CS2's arguments.
- if (onlyAccessesArgPointees(CS2B)) {
- if (!doesAccessArgPointees(CS2B))
+ // If Call2 only access memory through arguments, accumulate the mod/ref
+ // information from Call1's references to the memory referenced by
+ // Call2's arguments.
+ if (onlyAccessesArgPointees(Call2B)) {
+ if (!doesAccessArgPointees(Call2B))
return ModRefInfo::NoModRef;
ModRefInfo R = ModRefInfo::NoModRef;
bool IsMustAlias = true;
- for (auto I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ for (auto I = Call2->arg_begin(), E = Call2->arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
continue;
- unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
- auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
-
- // ArgModRefCS2 indicates what CS2 might do to CS2ArgLoc, and the
- // dependence of CS1 on that location is the inverse:
- // - If CS2 modifies location, dependence exists if CS1 reads or writes.
- // - If CS2 only reads location, dependence exists if CS1 writes.
- ModRefInfo ArgModRefCS2 = getArgModRefInfo(CS2, CS2ArgIdx);
+ unsigned Call2ArgIdx = std::distance(Call2->arg_begin(), I);
+ auto Call2ArgLoc =
+ MemoryLocation::getForArgument(Call2, Call2ArgIdx, TLI);
+
+ // ArgModRefC2 indicates what Call2 might do to Call2ArgLoc, and the
+ // dependence of Call1 on that location is the inverse:
+ // - If Call2 modifies location, dependence exists if Call1 reads or
+ // writes.
+ // - If Call2 only reads location, dependence exists if Call1 writes.
+ ModRefInfo ArgModRefC2 = getArgModRefInfo(Call2, Call2ArgIdx);
ModRefInfo ArgMask = ModRefInfo::NoModRef;
- if (isModSet(ArgModRefCS2))
+ if (isModSet(ArgModRefC2))
ArgMask = ModRefInfo::ModRef;
- else if (isRefSet(ArgModRefCS2))
+ else if (isRefSet(ArgModRefC2))
ArgMask = ModRefInfo::Mod;
- // ModRefCS1 indicates what CS1 might do to CS2ArgLoc, and we use
+ // ModRefC1 indicates what Call1 might do to Call2ArgLoc, and we use
// above ArgMask to update dependence info.
- ModRefInfo ModRefCS1 = getModRefInfo(CS1, CS2ArgLoc);
- ArgMask = intersectModRef(ArgMask, ModRefCS1);
+ ModRefInfo ModRefC1 = getModRefInfo(Call1, Call2ArgLoc);
+ ArgMask = intersectModRef(ArgMask, ModRefC1);
// Conservatively clear IsMustAlias unless only MustAlias is found.
- IsMustAlias &= isMustSet(ModRefCS1);
+ IsMustAlias &= isMustSet(ModRefC1);
R = intersectModRef(unionModRef(R, ArgMask), Result);
if (R == Result) {
@@ -300,31 +300,32 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
return IsMustAlias ? setMust(R) : clearMust(R);
}
- // If CS1 only accesses memory through arguments, check if CS2 references
- // any of the memory referenced by CS1's arguments. If not, return NoModRef.
- if (onlyAccessesArgPointees(CS1B)) {
- if (!doesAccessArgPointees(CS1B))
+ // If Call1 only accesses memory through arguments, check if Call2 references
+ // any of the memory referenced by Call1's arguments. If not, return NoModRef.
+ if (onlyAccessesArgPointees(Call1B)) {
+ if (!doesAccessArgPointees(Call1B))
return ModRefInfo::NoModRef;
ModRefInfo R = ModRefInfo::NoModRef;
bool IsMustAlias = true;
- for (auto I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+ for (auto I = Call1->arg_begin(), E = Call1->arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
continue;
- unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
- auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
-
- // ArgModRefCS1 indicates what CS1 might do to CS1ArgLoc; if CS1 might
- // Mod CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If
- // CS1 might Ref, then we care only about a Mod by CS2.
- ModRefInfo ArgModRefCS1 = getArgModRefInfo(CS1, CS1ArgIdx);
- ModRefInfo ModRefCS2 = getModRefInfo(CS2, CS1ArgLoc);
- if ((isModSet(ArgModRefCS1) && isModOrRefSet(ModRefCS2)) ||
- (isRefSet(ArgModRefCS1) && isModSet(ModRefCS2)))
- R = intersectModRef(unionModRef(R, ArgModRefCS1), Result);
+ unsigned Call1ArgIdx = std::distance(Call1->arg_begin(), I);
+ auto Call1ArgLoc =
+ MemoryLocation::getForArgument(Call1, Call1ArgIdx, TLI);
+
+ // ArgModRefC1 indicates what Call1 might do to Call1ArgLoc; if Call1
+ // might Mod Call1ArgLoc, then we care about either a Mod or a Ref by
+ // Call2. If Call1 might Ref, then we care only about a Mod by Call2.
+ ModRefInfo ArgModRefC1 = getArgModRefInfo(Call1, Call1ArgIdx);
+ ModRefInfo ModRefC2 = getModRefInfo(Call2, Call1ArgLoc);
+ if ((isModSet(ArgModRefC1) && isModOrRefSet(ModRefC2)) ||
+ (isRefSet(ArgModRefC1) && isModSet(ModRefC2)))
+ R = intersectModRef(unionModRef(R, ArgModRefC1), Result);
// Conservatively clear IsMustAlias unless only MustAlias is found.
- IsMustAlias &= isMustSet(ModRefCS2);
+ IsMustAlias &= isMustSet(ModRefC2);
if (R == Result) {
// On early exit, not all args were checked, cannot set Must.
@@ -344,11 +345,11 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
return Result;
}
-FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) {
+FunctionModRefBehavior AAResults::getModRefBehavior(const CallBase *Call) {
FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
for (const auto &AA : AAs) {
- Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS));
+ Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(Call));
// Early-exit the moment we reach the bottom of the lattice.
if (Result == FMRB_DoesNotAccessMemory)
@@ -560,8 +561,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
isa<Constant>(Object))
return ModRefInfo::ModRef;
- ImmutableCallSite CS(I);
- if (!CS.getInstruction() || CS.getInstruction() == Object)
+ const auto *Call = dyn_cast<CallBase>(I);
+ if (!Call || Call == Object)
return ModRefInfo::ModRef;
if (PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
@@ -574,14 +575,14 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
ModRefInfo R = ModRefInfo::NoModRef;
bool IsMustAlias = true;
// Set flag only if no May found and all operands processed.
- for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
+ for (auto CI = Call->data_operands_begin(), CE = Call->data_operands_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture or byval pointer arguments. If this
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!CS.doesNotCapture(ArgNo) &&
- ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo)))
+ (!Call->doesNotCapture(ArgNo) && ArgNo < Call->getNumArgOperands() &&
+ !Call->isByValArgument(ArgNo)))
continue;
AliasResult AR = alias(MemoryLocation(*CI), MemoryLocation(Object));
@@ -593,9 +594,9 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
IsMustAlias = false;
if (AR == NoAlias)
continue;
- if (CS.doesNotAccessMemory(ArgNo))
+ if (Call->doesNotAccessMemory(ArgNo))
continue;
- if (CS.onlyReadsMemory(ArgNo)) {
+ if (Call->onlyReadsMemory(ArgNo)) {
R = ModRefInfo::Ref;
continue;
}
@@ -642,28 +643,6 @@ AnalysisKey AAManager::Key;
namespace {
-/// A wrapper pass for external alias analyses. This just squirrels away the
-/// callback used to run any analyses and register their results.
-struct ExternalAAWrapperPass : ImmutablePass {
- using CallbackT = std::function<void(Pass &, Function &, AAResults &)>;
-
- CallbackT CB;
-
- static char ID;
-
- ExternalAAWrapperPass() : ImmutablePass(ID) {
- initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
-
- explicit ExternalAAWrapperPass(CallbackT CB)
- : ImmutablePass(ID), CB(std::move(CB)) {
- initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-};
} // end anonymous namespace
@@ -799,8 +778,8 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
}
bool llvm::isNoAliasCall(const Value *V) {
- if (auto CS = ImmutableCallSite(V))
- return CS.hasRetAttr(Attribute::NoAlias);
+ if (const auto *Call = dyn_cast<CallBase>(V))
+ return Call->hasRetAttr(Attribute::NoAlias);
return false;
}
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 764ae9160350..85dd4fe95b33 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -66,11 +66,10 @@ static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I,
}
}
-static inline void PrintModRefResults(const char *Msg, bool P, CallSite CSA,
- CallSite CSB, Module *M) {
+static inline void PrintModRefResults(const char *Msg, bool P, CallBase *CallA,
+ CallBase *CallB, Module *M) {
if (PrintAll || P) {
- errs() << " " << Msg << ": " << *CSA.getInstruction() << " <-> "
- << *CSB.getInstruction() << '\n';
+ errs() << " " << Msg << ": " << *CallA << " <-> " << *CallB << '\n';
}
}
@@ -98,7 +97,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
++FunctionCount;
SetVector<Value *> Pointers;
- SmallSetVector<CallSite, 16> CallSites;
+ SmallSetVector<CallBase *, 16> Calls;
SetVector<Value *> Loads;
SetVector<Value *> Stores;
@@ -114,16 +113,16 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
if (EvalAAMD && isa<StoreInst>(&*I))
Stores.insert(&*I);
Instruction &Inst = *I;
- if (auto CS = CallSite(&Inst)) {
- Value *Callee = CS.getCalledValue();
+ if (auto *Call = dyn_cast<CallBase>(&Inst)) {
+ Value *Callee = Call->getCalledValue();
// Skip actual functions for direct function calls.
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
Pointers.insert(Callee);
// Consider formals.
- for (Use &DataOp : CS.data_ops())
+ for (Use &DataOp : Call->data_ops())
if (isInterestingPointer(DataOp))
Pointers.insert(DataOp);
- CallSites.insert(CS);
+ Calls.insert(Call);
} else {
// Consider all operands.
for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
@@ -136,19 +135,21 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
if (PrintAll || PrintNoAlias || PrintMayAlias || PrintPartialAlias ||
PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef)
errs() << "Function: " << F.getName() << ": " << Pointers.size()
- << " pointers, " << CallSites.size() << " call sites\n";
+ << " pointers, " << Calls.size() << " call sites\n";
// iterate over the worklist, and run the full (n^2)/2 disambiguations
for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
I1 != E; ++I1) {
- uint64_t I1Size = MemoryLocation::UnknownSize;
+ auto I1Size = LocationSize::unknown();
Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
- if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);
+ if (I1ElTy->isSized())
+ I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
- uint64_t I2Size = MemoryLocation::UnknownSize;
- Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
- if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
+ auto I2Size = LocationSize::unknown();
+ Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType();
+ if (I2ElTy->isSized())
+ I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
AliasResult AR = AA.alias(*I1, I1Size, *I2, I2Size);
switch (AR) {
@@ -228,49 +229,48 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
}
// Mod/ref alias analysis: compare all pairs of calls and values
- for (CallSite C : CallSites) {
- Instruction *I = C.getInstruction();
-
+ for (CallBase *Call : Calls) {
for (auto Pointer : Pointers) {
- uint64_t Size = MemoryLocation::UnknownSize;
+ auto Size = LocationSize::unknown();
Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType();
- if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
+ if (ElTy->isSized())
+ Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
- switch (AA.getModRefInfo(C, Pointer, Size)) {
+ switch (AA.getModRefInfo(Call, Pointer, Size)) {
case ModRefInfo::NoModRef:
- PrintModRefResults("NoModRef", PrintNoModRef, I, Pointer,
+ PrintModRefResults("NoModRef", PrintNoModRef, Call, Pointer,
F.getParent());
++NoModRefCount;
break;
case ModRefInfo::Mod:
- PrintModRefResults("Just Mod", PrintMod, I, Pointer, F.getParent());
+ PrintModRefResults("Just Mod", PrintMod, Call, Pointer, F.getParent());
++ModCount;
break;
case ModRefInfo::Ref:
- PrintModRefResults("Just Ref", PrintRef, I, Pointer, F.getParent());
+ PrintModRefResults("Just Ref", PrintRef, Call, Pointer, F.getParent());
++RefCount;
break;
case ModRefInfo::ModRef:
- PrintModRefResults("Both ModRef", PrintModRef, I, Pointer,
+ PrintModRefResults("Both ModRef", PrintModRef, Call, Pointer,
F.getParent());
++ModRefCount;
break;
case ModRefInfo::Must:
- PrintModRefResults("Must", PrintMust, I, Pointer, F.getParent());
+ PrintModRefResults("Must", PrintMust, Call, Pointer, F.getParent());
++MustCount;
break;
case ModRefInfo::MustMod:
- PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, I, Pointer,
+ PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, Call, Pointer,
F.getParent());
++MustModCount;
break;
case ModRefInfo::MustRef:
- PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, I, Pointer,
+ PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, Call, Pointer,
F.getParent());
++MustRefCount;
break;
case ModRefInfo::MustModRef:
- PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, I,
+ PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, Call,
Pointer, F.getParent());
++MustModRefCount;
break;
@@ -279,44 +279,46 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
}
// Mod/ref alias analysis: compare all pairs of calls
- for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
- for (auto D = CallSites.begin(); D != Ce; ++D) {
- if (D == C)
+ for (CallBase *CallA : Calls) {
+ for (CallBase *CallB : Calls) {
+ if (CallA == CallB)
continue;
- switch (AA.getModRefInfo(*C, *D)) {
+ switch (AA.getModRefInfo(CallA, CallB)) {
case ModRefInfo::NoModRef:
- PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+ PrintModRefResults("NoModRef", PrintNoModRef, CallA, CallB,
+ F.getParent());
++NoModRefCount;
break;
case ModRefInfo::Mod:
- PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+ PrintModRefResults("Just Mod", PrintMod, CallA, CallB, F.getParent());
++ModCount;
break;
case ModRefInfo::Ref:
- PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+ PrintModRefResults("Just Ref", PrintRef, CallA, CallB, F.getParent());
++RefCount;
break;
case ModRefInfo::ModRef:
- PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+ PrintModRefResults("Both ModRef", PrintModRef, CallA, CallB,
+ F.getParent());
++ModRefCount;
break;
case ModRefInfo::Must:
- PrintModRefResults("Must", PrintMust, *C, *D, F.getParent());
+ PrintModRefResults("Must", PrintMust, CallA, CallB, F.getParent());
++MustCount;
break;
case ModRefInfo::MustMod:
- PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, *C, *D,
+ PrintModRefResults("Just Mod (MustAlias)", PrintMustMod, CallA, CallB,
F.getParent());
++MustModCount;
break;
case ModRefInfo::MustRef:
- PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, *C, *D,
+ PrintModRefResults("Just Ref (MustAlias)", PrintMustRef, CallA, CallB,
F.getParent());
++MustRefCount;
break;
case ModRefInfo::MustModRef:
- PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, *C, *D,
- F.getParent());
+ PrintModRefResults("Both ModRef (MustAlias)", PrintMustModRef, CallA,
+ CallB, F.getParent());
++MustModRefCount;
break;
}
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 8f903fa4f1e8..f6ad704cc914 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -13,9 +13,9 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Config/llvm-config.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -24,6 +24,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -55,7 +56,6 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
// Update the alias and access types of this set...
Access |= AS.Access;
Alias |= AS.Alias;
- Volatile |= AS.Volatile;
if (Alias == SetMustAlias) {
// Check that these two merged sets really are must aliases. Since both
@@ -113,10 +113,9 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
if (AliasSet *Fwd = AS->Forward) {
Fwd->dropRef(*this);
AS->Forward = nullptr;
- }
-
- if (AS->Alias == AliasSet::SetMayAlias)
- TotalMayAliasSetSize -= AS->size();
+ } else // Update TotalMayAliasSetSize only if not forwarding.
+ if (AS->Alias == AliasSet::SetMayAlias)
+ TotalMayAliasSetSize -= AS->size();
AliasSets.erase(AS);
}
@@ -169,7 +168,12 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
addRef();
UnknownInsts.emplace_back(I);
- if (!I->mayWriteToMemory()) {
+ // Guards are marked as modifying memory for control flow modelling purposes,
+ // but don't actually modify any specific memory location.
+ using namespace PatternMatch;
+ bool MayWriteMemory = I->mayWriteToMemory() && !isGuard(I) &&
+ !(I->use_empty() && match(I, m_Intrinsic<Intrinsic::invariant_start>()));
+ if (!MayWriteMemory) {
Alias = SetMayAlias;
Access |= RefAccess;
return;
@@ -226,12 +230,13 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
if (AliasAny)
return true;
- if (!Inst->mayReadOrWriteMemory())
- return false;
+ assert(Inst->mayReadOrWriteMemory() &&
+ "Instruction must either read or write memory.");
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
if (auto *UnknownInst = getUnknownInst(i)) {
- ImmutableCallSite C1(UnknownInst), C2(Inst);
+ const auto *C1 = dyn_cast<CallBase>(UnknownInst);
+ const auto *C2 = dyn_cast<CallBase>(Inst);
if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) ||
isModOrRefSet(AA.getModRefInfo(C2, C1)))
return true;
@@ -246,6 +251,31 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
return false;
}
+Instruction* AliasSet::getUniqueInstruction() {
+ if (AliasAny)
+ // May have collapses alias set
+ return nullptr;
+ if (begin() != end()) {
+ if (!UnknownInsts.empty())
+ // Another instruction found
+ return nullptr;
+ if (std::next(begin()) != end())
+ // Another instruction found
+ return nullptr;
+ Value *Addr = begin()->getValue();
+ assert(!Addr->user_empty() &&
+ "where's the instruction which added this pointer?");
+ if (std::next(Addr->user_begin()) != Addr->user_end())
+ // Another instruction found -- this is really restrictive
+ // TODO: generalize!
+ return nullptr;
+ return cast<Instruction>(*(Addr->user_begin()));
+ }
+ if (1 != UnknownInsts.size())
+ return nullptr;
+ return cast<Instruction>(UnknownInsts[0]);
+}
+
void AliasSetTracker::clear() {
// Delete all the PointerRec entries.
for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
@@ -280,13 +310,6 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
return FoundSet;
}
-bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
- for (const AliasSet &AS : *this)
- if (!AS.Forward && AS.aliasesUnknownInst(Inst, AA))
- return true;
- return false;
-}
-
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = nullptr;
for (iterator I = begin(), E = end(); I != E;) {
@@ -295,17 +318,18 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
continue;
if (!FoundSet) // If this is the first alias set ptr can go into.
FoundSet = &*Cur; // Remember it.
- else if (!Cur->Forward) // Otherwise, we must merge the sets.
+ else // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*Cur, *this); // Merge in contents.
}
return FoundSet;
}
-/// getAliasSetForPointer - Return the alias set that the specified pointer
-/// lives in.
-AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
- LocationSize Size,
- const AAMDNodes &AAInfo) {
+AliasSet &AliasSetTracker::getAliasSetFor(const MemoryLocation &MemLoc) {
+
+ Value * const Pointer = const_cast<Value*>(MemLoc.Ptr);
+ const LocationSize Size = MemLoc.Size;
+ const AAMDNodes &AAInfo = MemLoc.AATags;
+
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
if (AliasAnyAS) {
@@ -351,83 +375,32 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer,
void AliasSetTracker::add(Value *Ptr, LocationSize Size,
const AAMDNodes &AAInfo) {
- addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess);
+ addPointer(MemoryLocation(Ptr, Size, AAInfo), AliasSet::NoAccess);
}
void AliasSetTracker::add(LoadInst *LI) {
- if (isStrongerThanMonotonic(LI->getOrdering())) return addUnknown(LI);
-
- AAMDNodes AAInfo;
- LI->getAAMetadata(AAInfo);
-
- AliasSet::AccessLattice Access = AliasSet::RefAccess;
- const DataLayout &DL = LI->getModule()->getDataLayout();
- AliasSet &AS = addPointer(LI->getOperand(0),
- DL.getTypeStoreSize(LI->getType()), AAInfo, Access);
- if (LI->isVolatile()) AS.setVolatile();
+ if (isStrongerThanMonotonic(LI->getOrdering()))
+ return addUnknown(LI);
+ addPointer(MemoryLocation::get(LI), AliasSet::RefAccess);
}
void AliasSetTracker::add(StoreInst *SI) {
- if (isStrongerThanMonotonic(SI->getOrdering())) return addUnknown(SI);
-
- AAMDNodes AAInfo;
- SI->getAAMetadata(AAInfo);
-
- AliasSet::AccessLattice Access = AliasSet::ModAccess;
- const DataLayout &DL = SI->getModule()->getDataLayout();
- Value *Val = SI->getOperand(0);
- AliasSet &AS = addPointer(
- SI->getOperand(1), DL.getTypeStoreSize(Val->getType()), AAInfo, Access);
- if (SI->isVolatile()) AS.setVolatile();
+ if (isStrongerThanMonotonic(SI->getOrdering()))
+ return addUnknown(SI);
+ addPointer(MemoryLocation::get(SI), AliasSet::ModAccess);
}
void AliasSetTracker::add(VAArgInst *VAAI) {
- AAMDNodes AAInfo;
- VAAI->getAAMetadata(AAInfo);
-
- addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
- AliasSet::ModRefAccess);
+ addPointer(MemoryLocation::get(VAAI), AliasSet::ModRefAccess);
}
void AliasSetTracker::add(AnyMemSetInst *MSI) {
- AAMDNodes AAInfo;
- MSI->getAAMetadata(AAInfo);
-
- uint64_t Len;
-
- if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
- Len = C->getZExtValue();
- else
- Len = MemoryLocation::UnknownSize;
-
- AliasSet &AS =
- addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
- auto *MS = dyn_cast<MemSetInst>(MSI);
- if (MS && MS->isVolatile())
- AS.setVolatile();
+ addPointer(MemoryLocation::getForDest(MSI), AliasSet::ModAccess);
}
void AliasSetTracker::add(AnyMemTransferInst *MTI) {
- AAMDNodes AAInfo;
- MTI->getAAMetadata(AAInfo);
-
- uint64_t Len;
- if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
- Len = C->getZExtValue();
- else
- Len = MemoryLocation::UnknownSize;
-
- AliasSet &ASSrc =
- addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess);
-
- AliasSet &ASDst =
- addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
-
- auto* MT = dyn_cast<MemTransferInst>(MTI);
- if (MT && MT->isVolatile()) {
- ASSrc.setVolatile();
- ASDst.setVolatile();
- }
+ addPointer(MemoryLocation::getForDest(MTI), AliasSet::ModAccess);
+ addPointer(MemoryLocation::getForSource(MTI), AliasSet::RefAccess);
}
void AliasSetTracker::addUnknown(Instruction *Inst) {
@@ -471,6 +444,46 @@ void AliasSetTracker::add(Instruction *I) {
return add(MSI);
if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(I))
return add(MTI);
+
+ // Handle all calls with known mod/ref sets genericall
+ if (auto *Call = dyn_cast<CallBase>(I))
+ if (Call->onlyAccessesArgMemory()) {
+ auto getAccessFromModRef = [](ModRefInfo MRI) {
+ if (isRefSet(MRI) && isModSet(MRI))
+ return AliasSet::ModRefAccess;
+ else if (isModSet(MRI))
+ return AliasSet::ModAccess;
+ else if (isRefSet(MRI))
+ return AliasSet::RefAccess;
+ else
+ return AliasSet::NoAccess;
+ };
+
+ ModRefInfo CallMask = createModRefInfo(AA.getModRefBehavior(Call));
+
+ // Some intrinsics are marked as modifying memory for control flow
+ // modelling purposes, but don't actually modify any specific memory
+ // location.
+ using namespace PatternMatch;
+ if (Call->use_empty() &&
+ match(Call, m_Intrinsic<Intrinsic::invariant_start>()))
+ CallMask = clearMod(CallMask);
+
+ for (auto IdxArgPair : enumerate(Call->args())) {
+ int ArgIdx = IdxArgPair.index();
+ const Value *Arg = IdxArgPair.value();
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ MemoryLocation ArgLoc =
+ MemoryLocation::getForArgument(Call, ArgIdx, nullptr);
+ ModRefInfo ArgMask = AA.getArgModRefInfo(Call, ArgIdx);
+ ArgMask = intersectModRef(CallMask, ArgMask);
+ if (!isNoModRef(ArgMask))
+ addPointer(ArgLoc, getAccessFromModRef(ArgMask));
+ }
+ return;
+ }
+
return addUnknown(I);
}
@@ -496,12 +509,10 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
add(Inst);
// Loop over all of the pointers in this alias set.
- for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
- AliasSet &NewAS =
- addPointer(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo(),
- (AliasSet::AccessLattice)AS.Access);
- if (AS.isVolatile()) NewAS.setVolatile();
- }
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI)
+ addPointer(
+ MemoryLocation(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo()),
+ (AliasSet::AccessLattice)AS.Access);
}
}
@@ -594,10 +605,9 @@ AliasSet &AliasSetTracker::mergeAllAliasSets() {
return *AliasAnyAS;
}
-AliasSet &AliasSetTracker::addPointer(Value *P, LocationSize Size,
- const AAMDNodes &AAInfo,
+AliasSet &AliasSetTracker::addPointer(MemoryLocation Loc,
AliasSet::AccessLattice E) {
- AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
+ AliasSet &AS = getAliasSetFor(Loc);
AS.Access |= E;
if (!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold)) {
@@ -623,7 +633,6 @@ void AliasSet::print(raw_ostream &OS) const {
case ModRefAccess: OS << "Mod/Ref "; break;
default: llvm_unreachable("Bad value for Access!");
}
- if (isVolatile()) OS << "[volatile] ";
if (Forward)
OS << " forwarding to " << (void*)Forward;
@@ -632,7 +641,10 @@ void AliasSet::print(raw_ostream &OS) const {
for (iterator I = begin(), E = end(); I != E; ++I) {
if (I != begin()) OS << ", ";
I.getPointer()->printAsOperand(OS << "(");
- OS << ", " << I.getSize() << ")";
+ if (I.getSize() == LocationSize::unknown())
+ OS << ", unknown)";
+ else
+ OS << ", " << I.getSize() << ")";
}
}
if (!UnknownInsts.empty()) {
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 30576cf1ae10..bb8742123a0f 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -39,7 +39,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeDependenceAnalysisWrapperPassPass(Registry);
initializeDelinearizationPass(Registry);
initializeDemandedBitsWrapperPassPass(Registry);
- initializeDivergenceAnalysisPass(Registry);
initializeDominanceFrontierWrapperPassPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
@@ -58,6 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
initializeLazyValueInfoPrinterPass(Registry);
+ initializeLegacyDivergenceAnalysisPass(Registry);
initializeLintPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
@@ -77,6 +77,8 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeRegionOnlyPrinterPass(Registry);
initializeSCEVAAWrapperPassPass(Registry);
initializeScalarEvolutionWrapperPassPass(Registry);
+ initializeStackSafetyGlobalInfoWrapperPassPass(Registry);
+ initializeStackSafetyInfoWrapperPassPass(Registry);
initializeTargetTransformInfoWrapperPassPass(Registry);
initializeTypeBasedAAWrapperPassPass(Registry);
initializeScopedNoAliasAAWrapperPassPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 1a24ae3dba15..332eeaa00e73 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -31,7 +31,6 @@
#include "llvm/Analysis/PhiValues.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -68,6 +67,16 @@ using namespace llvm;
/// Enable analysis of recursive PHI nodes.
static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
cl::init(false));
+
+/// By default, even on 32-bit architectures we use 64-bit integers for
+/// calculations. This will allow us to more-aggressively decompose indexing
+/// expressions calculated using i64 values (e.g., long long in C) which is
+/// common enough to worry about.
+static cl::opt<bool> ForceAtLeast64Bits("basicaa-force-at-least-64b",
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> DoubleCalcBits("basicaa-double-calc-bits",
+ cl::Hidden, cl::init(false));
+
/// SearchLimitReached / SearchTimes shows how often the limit of
/// to decompose GEPs is reached. It will affect the precision
/// of basic alias analysis.
@@ -134,7 +143,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
/// Returns true if the pointer is one which would have been considered an
/// escape by isNonEscapingLocalObject.
static bool isEscapeSource(const Value *V) {
- if (ImmutableCallSite(V))
+ if (isa<CallBase>(V))
return true;
if (isa<Argument>(V))
@@ -381,13 +390,22 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
}
/// To ensure a pointer offset fits in an integer of size PointerSize
-/// (in bits) when that size is smaller than 64. This is an issue in
-/// particular for 32b programs with negative indices that rely on two's
-/// complement wrap-arounds for precise alias information.
-static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) {
- assert(PointerSize <= 64 && "Invalid PointerSize!");
- unsigned ShiftBits = 64 - PointerSize;
- return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits;
+/// (in bits) when that size is smaller than the maximum pointer size. This is
+/// an issue, for example, in particular for 32b pointers with negative indices
+/// that rely on two's complement wrap-arounds for precise alias information
+/// where the maximum pointer size is 64b.
+static APInt adjustToPointerSize(APInt Offset, unsigned PointerSize) {
+ assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!");
+ unsigned ShiftBits = Offset.getBitWidth() - PointerSize;
+ return (Offset << ShiftBits).ashr(ShiftBits);
+}
+
+static unsigned getMaxPointerSize(const DataLayout &DL) {
+ unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
+ if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64;
+ if (DoubleCalcBits) MaxPointerSize *= 2;
+
+ return MaxPointerSize;
}
/// If V is a symbolic pointer expression, decompose it into a base pointer
@@ -410,8 +428,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
unsigned MaxLookup = MaxLookupSearchDepth;
SearchTimes++;
- Decomposed.StructOffset = 0;
- Decomposed.OtherOffset = 0;
+ unsigned MaxPointerSize = getMaxPointerSize(DL);
Decomposed.VarIndices.clear();
do {
// See if this is a bitcast or GEP.
@@ -436,7 +453,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
if (!GEPOp) {
- if (auto CS = ImmutableCallSite(V)) {
+ if (const auto *Call = dyn_cast<CallBase>(V)) {
// CaptureTracking can know about special capturing properties of some
// intrinsics like launder.invariant.group, that can't be expressed with
// the attributes, but have properties like returning aliasing pointer.
@@ -446,7 +463,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
V = RP;
continue;
}
@@ -501,13 +518,15 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
if (CIdx->isZero())
continue;
Decomposed.OtherOffset +=
- DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue();
+ (DL.getTypeAllocSize(GTI.getIndexedType()) *
+ CIdx->getValue().sextOrSelf(MaxPointerSize))
+ .sextOrTrunc(MaxPointerSize);
continue;
}
GepHasConstantOffset = false;
- uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType());
+ APInt Scale(MaxPointerSize, DL.getTypeAllocSize(GTI.getIndexedType()));
unsigned ZExtBits = 0, SExtBits = 0;
// If the integer type is smaller than the pointer size, it is implicitly
@@ -519,20 +538,34 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
bool NSW = true, NUW = true;
+ const Value *OrigIndex = Index;
Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
SExtBits, DL, 0, AC, DT, NSW, NUW);
- // All GEP math happens in the width of the pointer type,
- // so we can truncate the value to 64-bits as we don't handle
- // currently pointers larger than 64 bits and we would crash
- // later. TODO: Make `Scale` an APInt to avoid this problem.
- if (IndexScale.getBitWidth() > 64)
- IndexScale = IndexScale.sextOrTrunc(64);
-
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
- Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale;
- Scale *= IndexScale.getSExtValue();
+
+ // It can be the case that, even through C1*V+C2 does not overflow for
+ // relevant values of V, (C2*Scale) can overflow. In that case, we cannot
+ // decompose the expression in this way.
+ //
+ // FIXME: C1*Scale and the other operations in the decomposed
+ // (C1*Scale)*V+C2*Scale can also overflow. We should check for this
+ // possibility.
+ APInt WideScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize*2) *
+ Scale.sext(MaxPointerSize*2);
+ if (WideScaledOffset.getMinSignedBits() > MaxPointerSize) {
+ Index = OrigIndex;
+ IndexScale = 1;
+ IndexOffset = 0;
+
+ ZExtBits = SExtBits = 0;
+ if (PointerSize > Width)
+ SExtBits += PointerSize - Width;
+ } else {
+ Decomposed.OtherOffset += IndexOffset.sextOrTrunc(MaxPointerSize) * Scale;
+ Scale *= IndexScale.sextOrTrunc(MaxPointerSize);
+ }
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
@@ -552,9 +585,8 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// pointer size.
Scale = adjustToPointerSize(Scale, PointerSize);
- if (Scale) {
- VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
- static_cast<int64_t>(Scale)};
+ if (!!Scale) {
+ VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale};
Decomposed.VarIndices.push_back(Entry);
}
}
@@ -640,8 +672,8 @@ bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
}
/// Returns the behavior when calling the given call site.
-FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
- if (CS.doesNotAccessMemory())
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(const CallBase *Call) {
+ if (Call->doesNotAccessMemory())
// Can't do better than this.
return FMRB_DoesNotAccessMemory;
@@ -649,23 +681,23 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
// If the callsite knows it only reads memory, don't return worse
// than that.
- if (CS.onlyReadsMemory())
+ if (Call->onlyReadsMemory())
Min = FMRB_OnlyReadsMemory;
- else if (CS.doesNotReadMemory())
+ else if (Call->doesNotReadMemory())
Min = FMRB_DoesNotReadMemory;
- if (CS.onlyAccessesArgMemory())
+ if (Call->onlyAccessesArgMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
- else if (CS.onlyAccessesInaccessibleMemory())
+ else if (Call->onlyAccessesInaccessibleMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem);
- else if (CS.onlyAccessesInaccessibleMemOrArgMem())
+ else if (Call->onlyAccessesInaccessibleMemOrArgMem())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem);
- // If CS has operand bundles then aliasing attributes from the function it
- // calls do not directly apply to the CallSite. This can be made more
- // precise in the future.
- if (!CS.hasOperandBundles())
- if (const Function *F = CS.getCalledFunction())
+ // If the call has operand bundles then aliasing attributes from the function
+ // it calls do not directly apply to the call. This can be made more precise
+ // in the future.
+ if (!Call->hasOperandBundles())
+ if (const Function *F = Call->getCalledFunction())
Min =
FunctionModRefBehavior(Min & getBestAAResults().getModRefBehavior(F));
@@ -698,9 +730,9 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
}
/// Returns true if this is a writeonly (i.e Mod only) parameter.
-static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
+static bool isWriteOnlyParam(const CallBase *Call, unsigned ArgIdx,
const TargetLibraryInfo &TLI) {
- if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly))
+ if (Call->paramHasAttr(ArgIdx, Attribute::WriteOnly))
return true;
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -710,7 +742,8 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
// FIXME Consider handling this in InferFunctionAttr.cpp together with other
// attributes.
LibFunc F;
- if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
+ if (Call->getCalledFunction() &&
+ TLI.getLibFunc(*Call->getCalledFunction(), F) &&
F == LibFunc_memset_pattern16 && TLI.has(F))
if (ArgIdx == 0)
return true;
@@ -722,23 +755,23 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
return false;
}
-ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+ModRefInfo BasicAAResult::getArgModRefInfo(const CallBase *Call,
unsigned ArgIdx) {
// Checking for known builtin intrinsics and target library functions.
- if (isWriteOnlyParam(CS, ArgIdx, TLI))
+ if (isWriteOnlyParam(Call, ArgIdx, TLI))
return ModRefInfo::Mod;
- if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly))
+ if (Call->paramHasAttr(ArgIdx, Attribute::ReadOnly))
return ModRefInfo::Ref;
- if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone))
+ if (Call->paramHasAttr(ArgIdx, Attribute::ReadNone))
return ModRefInfo::NoModRef;
- return AAResultBase::getArgModRefInfo(CS, ArgIdx);
+ return AAResultBase::getArgModRefInfo(Call, ArgIdx);
}
-static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) {
- const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+static bool isIntrinsicCall(const CallBase *Call, Intrinsic::ID IID) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call);
return II && II->getIntrinsicID() == IID;
}
@@ -794,27 +827,34 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
/// Since we only look at local properties of this function, we really can't
/// say much about this query. We do, however, use simple "address taken"
/// analysis on local objects.
-ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
- assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+ assert(notDifferentParent(Call, Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
- // If this is a tail call and Loc.Ptr points to a stack location, we know that
- // the tail call cannot access or modify the local stack.
- // We cannot exclude byval arguments here; these belong to the caller of
- // the current function not to the current function, and a tail callee
- // may reference them.
+ // Calls marked 'tail' cannot read or write allocas from the current frame
+ // because the current frame might be destroyed by the time they run. However,
+ // a tail call may use an alloca with byval. Calling with byval copies the
+ // contents of the alloca into argument registers or stack slots, so there is
+ // no lifetime issue.
if (isa<AllocaInst>(Object))
- if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
- if (CI->isTailCall())
+ if (const CallInst *CI = dyn_cast<CallInst>(Call))
+ if (CI->isTailCall() &&
+ !CI->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return ModRefInfo::NoModRef;
+ // Stack restore is able to modify unescaped dynamic allocas. Assume it may
+ // modify them even though the alloca is not escaped.
+ if (auto *AI = dyn_cast<AllocaInst>(Object))
+ if (!AI->isStaticAlloca() && isIntrinsicCall(Call, Intrinsic::stackrestore))
+ return ModRefInfo::Mod;
+
// If the pointer is to a locally allocated object that does not escape,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
- if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+ if (!isa<Constant>(Object) && Call != Object &&
isNonEscapingLocalObject(Object)) {
// Optimistically assume that call doesn't touch Object and check this
@@ -823,19 +863,20 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
bool IsMustAlias = true;
unsigned OperandNo = 0;
- for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
+ for (auto CI = Call->data_operands_begin(), CE = Call->data_operands_end();
CI != CE; ++CI, ++OperandNo) {
// Only look at the no-capture or byval pointer arguments. If this
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!CS.doesNotCapture(OperandNo) &&
- OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
+ (!Call->doesNotCapture(OperandNo) &&
+ OperandNo < Call->getNumArgOperands() &&
+ !Call->isByValArgument(OperandNo)))
continue;
// Call doesn't access memory through this operand, so we don't care
// if it aliases with Object.
- if (CS.doesNotAccessMemory(OperandNo))
+ if (Call->doesNotAccessMemory(OperandNo))
continue;
// If this is a no-capture pointer argument, see if we can tell that it
@@ -849,12 +890,12 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
continue;
// Operand aliases 'Object', but call doesn't modify it. Strengthen
// initial assumption and keep looking in case if there are more aliases.
- if (CS.onlyReadsMemory(OperandNo)) {
+ if (Call->onlyReadsMemory(OperandNo)) {
Result = setRef(Result);
continue;
}
// Operand aliases 'Object' but call only writes into it.
- if (CS.doesNotReadMemory(OperandNo)) {
+ if (Call->doesNotReadMemory(OperandNo)) {
Result = setMod(Result);
continue;
}
@@ -878,17 +919,16 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
}
}
- // If the CallSite is to malloc or calloc, we can assume that it doesn't
+ // If the call is to malloc or calloc, we can assume that it doesn't
// modify any IR visible value. This is only valid because we assume these
// routines do not read values visible in the IR. TODO: Consider special
// casing realloc and strdup routines which access only their arguments as
// well. Or alternatively, replace all of this with inaccessiblememonly once
// that's implemented fully.
- auto *Inst = CS.getInstruction();
- if (isMallocOrCallocLikeFn(Inst, &TLI)) {
+ if (isMallocOrCallocLikeFn(Call, &TLI)) {
// Be conservative if the accessed pointer may alias the allocation -
// fallback to the generic handling below.
- if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias)
+ if (getBestAAResults().alias(MemoryLocation(Call), Loc) == NoAlias)
return ModRefInfo::NoModRef;
}
@@ -896,7 +936,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// operands, i.e., source and destination of any given memcpy must no-alias.
// If Loc must-aliases either one of these two locations, then it necessarily
// no-aliases the other.
- if (auto *Inst = dyn_cast<AnyMemCpyInst>(CS.getInstruction())) {
+ if (auto *Inst = dyn_cast<AnyMemCpyInst>(Call)) {
AliasResult SrcAA, DestAA;
if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
@@ -920,7 +960,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
- if (isIntrinsicCall(CS, Intrinsic::assume))
+ if (isIntrinsicCall(Call, Intrinsic::assume))
return ModRefInfo::NoModRef;
// Like assumes, guard intrinsics are also marked as arbitrarily writing so
@@ -930,7 +970,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// *Unlike* assumes, guard intrinsics are modeled as reading memory since the
// heap state at the point the guard is issued needs to be consistent in case
// the guard invokes the "deopt" continuation.
- if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
+ if (isIntrinsicCall(Call, Intrinsic::experimental_guard))
return ModRefInfo::Ref;
// Like assumes, invariant.start intrinsics were also marked as arbitrarily
@@ -956,20 +996,20 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// The transformation will cause the second store to be ignored (based on
// rules of invariant.start) and print 40, while the first program always
// prints 50.
- if (isIntrinsicCall(CS, Intrinsic::invariant_start))
+ if (isIntrinsicCall(Call, Intrinsic::invariant_start))
return ModRefInfo::Ref;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
-ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
- if (isIntrinsicCall(CS1, Intrinsic::assume) ||
- isIntrinsicCall(CS2, Intrinsic::assume))
+ if (isIntrinsicCall(Call1, Intrinsic::assume) ||
+ isIntrinsicCall(Call2, Intrinsic::assume))
return ModRefInfo::NoModRef;
// Like assumes, guard intrinsics are also marked as arbitrarily writing so
@@ -983,26 +1023,26 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
// NB! This function is *not* commutative, so we specical case two
// possibilities for guard intrinsics.
- if (isIntrinsicCall(CS1, Intrinsic::experimental_guard))
- return isModSet(createModRefInfo(getModRefBehavior(CS2)))
+ if (isIntrinsicCall(Call1, Intrinsic::experimental_guard))
+ return isModSet(createModRefInfo(getModRefBehavior(Call2)))
? ModRefInfo::Ref
: ModRefInfo::NoModRef;
- if (isIntrinsicCall(CS2, Intrinsic::experimental_guard))
- return isModSet(createModRefInfo(getModRefBehavior(CS1)))
+ if (isIntrinsicCall(Call2, Intrinsic::experimental_guard))
+ return isModSet(createModRefInfo(getModRefBehavior(Call1)))
? ModRefInfo::Mod
: ModRefInfo::NoModRef;
// The AAResultBase base class has some smarts, lets use them.
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
}
/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
/// both having the exact same pointer operand.
static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
- LocationSize V1Size,
+ LocationSize MaybeV1Size,
const GEPOperator *GEP2,
- LocationSize V2Size,
+ LocationSize MaybeV2Size,
const DataLayout &DL) {
assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() ==
GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() &&
@@ -1018,10 +1058,13 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// If we don't know the size of the accesses through both GEPs, we can't
// determine whether the struct fields accessed can't alias.
- if (V1Size == MemoryLocation::UnknownSize ||
- V2Size == MemoryLocation::UnknownSize)
+ if (MaybeV1Size == LocationSize::unknown() ||
+ MaybeV2Size == LocationSize::unknown())
return MayAlias;
+ const uint64_t V1Size = MaybeV1Size.getValue();
+ const uint64_t V2Size = MaybeV2Size.getValue();
+
ConstantInt *C1 =
dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
ConstantInt *C2 =
@@ -1029,8 +1072,12 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// If the last (struct) indices are constants and are equal, the other indices
// might be also be dynamically equal, so the GEPs can alias.
- if (C1 && C2 && C1->getSExtValue() == C2->getSExtValue())
- return MayAlias;
+ if (C1 && C2) {
+ unsigned BitWidth = std::max(C1->getBitWidth(), C2->getBitWidth());
+ if (C1->getValue().sextOrSelf(BitWidth) ==
+ C2->getValue().sextOrSelf(BitWidth))
+ return MayAlias;
+ }
// Find the last-indexed type of the GEP, i.e., the type you'd get if
// you stripped the last index.
@@ -1113,6 +1160,10 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
return MayAlias;
}
+ if (C1->getValue().getActiveBits() > 64 ||
+ C2->getValue().getActiveBits() > 64)
+ return MayAlias;
+
// We know that:
// - both GEPs begin indexing from the exact same pointer;
// - the last indices in both GEPs are constants, indexing into a struct;
@@ -1178,11 +1229,13 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
// than (%alloca - 1), and so is not inbounds, a contradiction.
bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
- LocationSize ObjectAccessSize) {
+ LocationSize MaybeObjectAccessSize) {
// If the object access size is unknown, or the GEP isn't inbounds, bail.
- if (ObjectAccessSize == MemoryLocation::UnknownSize || !GEPOp->isInBounds())
+ if (MaybeObjectAccessSize == LocationSize::unknown() || !GEPOp->isInBounds())
return false;
+ const uint64_t ObjectAccessSize = MaybeObjectAccessSize.getValue();
+
// We need the object to be an alloca or a globalvariable, and want to know
// the offset of the pointer from the object precisely, so no variable
// indices are allowed.
@@ -1191,8 +1244,8 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
!DecompObject.VarIndices.empty())
return false;
- int64_t ObjectBaseOffset = DecompObject.StructOffset +
- DecompObject.OtherOffset;
+ APInt ObjectBaseOffset = DecompObject.StructOffset +
+ DecompObject.OtherOffset;
// If the GEP has no variable indices, we know the precise offset
// from the base, then use it. If the GEP has variable indices,
@@ -1200,10 +1253,11 @@ bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
// false in that case.
if (!DecompGEP.VarIndices.empty())
return false;
- int64_t GEPBaseOffset = DecompGEP.StructOffset;
+
+ APInt GEPBaseOffset = DecompGEP.StructOffset;
GEPBaseOffset += DecompGEP.OtherOffset;
- return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize);
+ return GEPBaseOffset.sge(ObjectBaseOffset + (int64_t)ObjectAccessSize);
}
/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
@@ -1218,13 +1272,17 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2) {
DecomposedGEP DecompGEP1, DecompGEP2;
+ unsigned MaxPointerSize = getMaxPointerSize(DL);
+ DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
+ DecompGEP2.StructOffset = DecompGEP2.OtherOffset = APInt(MaxPointerSize, 0);
+
bool GEP1MaxLookupReached =
DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT);
bool GEP2MaxLookupReached =
DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT);
- int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
- int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
+ APInt GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
+ APInt GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
"DecomposeGEPExpression returned a result different from "
@@ -1247,8 +1305,8 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
return NoAlias;
// Do the base pointers alias?
AliasResult BaseAlias =
- aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize, AAMDNodes(),
- UnderlyingV2, MemoryLocation::UnknownSize, AAMDNodes());
+ aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(),
+ UnderlyingV2, LocationSize::unknown(), AAMDNodes());
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
@@ -1307,13 +1365,12 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// pointer, we know they cannot alias.
// If both accesses are unknown size, we can't do anything useful here.
- if (V1Size == MemoryLocation::UnknownSize &&
- V2Size == MemoryLocation::UnknownSize)
+ if (V1Size == LocationSize::unknown() && V2Size == LocationSize::unknown())
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
- AAMDNodes(), V2, MemoryLocation::UnknownSize,
- V2AAInfo, nullptr, UnderlyingV2);
+ AliasResult R =
+ aliasCheck(UnderlyingV1, LocationSize::unknown(), AAMDNodes(), V2,
+ LocationSize::unknown(), V2AAInfo, nullptr, UnderlyingV2);
if (R != MustAlias) {
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -1343,9 +1400,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// that the objects are partially overlapping. If the difference is
// greater, we know they do not overlap.
if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) {
- if (GEP1BaseOffset >= 0) {
- if (V2Size != MemoryLocation::UnknownSize) {
- if ((uint64_t)GEP1BaseOffset < V2Size)
+ if (GEP1BaseOffset.sge(0)) {
+ if (V2Size != LocationSize::unknown()) {
+ if (GEP1BaseOffset.ult(V2Size.getValue()))
return PartialAlias;
return NoAlias;
}
@@ -1358,9 +1415,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// GEP1 V2
// We need to know that V2Size is not unknown, otherwise we might have
// stripped a gep with negative index ('gep <ptr>, -1, ...).
- if (V1Size != MemoryLocation::UnknownSize &&
- V2Size != MemoryLocation::UnknownSize) {
- if (-(uint64_t)GEP1BaseOffset < V1Size)
+ if (V1Size != LocationSize::unknown() &&
+ V2Size != LocationSize::unknown()) {
+ if ((-GEP1BaseOffset).ult(V1Size.getValue()))
return PartialAlias;
return NoAlias;
}
@@ -1368,7 +1425,7 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
}
if (!DecompGEP1.VarIndices.empty()) {
- uint64_t Modulo = 0;
+ APInt Modulo(MaxPointerSize, 0);
bool AllPositive = true;
for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
@@ -1376,7 +1433,7 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// Grab the least significant bit set in any of the scales. We
// don't need std::abs here (even if the scale's negative) as we'll
// be ^'ing Modulo with itself later.
- Modulo |= (uint64_t)DecompGEP1.VarIndices[i].Scale;
+ Modulo |= DecompGEP1.VarIndices[i].Scale;
if (AllPositive) {
// If the Value could change between cycles, then any reasoning about
@@ -1397,9 +1454,9 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// If the variable begins with a zero then we know it's
// positive, regardless of whether the value is signed or
// unsigned.
- int64_t Scale = DecompGEP1.VarIndices[i].Scale;
+ APInt Scale = DecompGEP1.VarIndices[i].Scale;
AllPositive =
- (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+ (SignKnownZero && Scale.sge(0)) || (SignKnownOne && Scale.slt(0));
}
}
@@ -1408,16 +1465,18 @@ BasicAAResult::aliasGEP(const GEPOperator *GEP1, LocationSize V1Size,
// We can compute the difference between the two addresses
// mod Modulo. Check whether that difference guarantees that the
// two locations do not alias.
- uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
- if (V1Size != MemoryLocation::UnknownSize &&
- V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
- V1Size <= Modulo - ModOffset)
+ APInt ModOffset = GEP1BaseOffset & (Modulo - 1);
+ if (V1Size != LocationSize::unknown() &&
+ V2Size != LocationSize::unknown() && ModOffset.uge(V2Size.getValue()) &&
+ (Modulo - ModOffset).uge(V1Size.getValue()))
return NoAlias;
// If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
// If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
// don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
- if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
+ if (AllPositive && GEP1BaseOffset.sgt(0) &&
+ V2Size != LocationSize::unknown() &&
+ GEP1BaseOffset.uge(V2Size.getValue()))
return NoAlias;
if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
@@ -1597,7 +1656,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
// unknown to represent all the possible values the GEP could advance the
// pointer to.
if (isRecursive)
- PNSize = MemoryLocation::UnknownSize;
+ PNSize = LocationSize::unknown();
AliasResult Alias =
aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
@@ -1631,7 +1690,7 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
const Value *O1, const Value *O2) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
- if (V1Size == 0 || V2Size == 0)
+ if (V1Size.isZero() || V2Size.isZero())
return NoAlias;
// Strip off any casts if they exist.
@@ -1705,10 +1764,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
bool NullIsValidLocation = NullPointerIsDefined(&F);
- if ((V1Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O2, V1Size, DL, TLI, NullIsValidLocation)) ||
- (V2Size != MemoryLocation::UnknownSize &&
- isObjectSmallerThan(O1, V2Size, DL, TLI, NullIsValidLocation)))
+ if ((V1Size.isPrecise() && isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI,
+ NullIsValidLocation)) ||
+ (V2Size.isPrecise() && isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI,
+ NullIsValidLocation)))
return NoAlias;
// Check the cache before climbing up use-def chains. This also terminates
@@ -1766,10 +1825,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// If both pointers are pointing into the same object and one of them
// accesses the entire object, then the accesses must overlap in some way.
if (O1 == O2)
- if (V1Size != MemoryLocation::UnknownSize &&
- V2Size != MemoryLocation::UnknownSize &&
- (isObjectSize(O1, V1Size, DL, TLI, NullIsValidLocation) ||
- isObjectSize(O2, V2Size, DL, TLI, NullIsValidLocation)))
+ if (V1Size.isPrecise() && V2Size.isPrecise() &&
+ (isObjectSize(O1, V1Size.getValue(), DL, TLI, NullIsValidLocation) ||
+ isObjectSize(O2, V2Size.getValue(), DL, TLI, NullIsValidLocation)))
return AliasCache[Locs] = PartialAlias;
// Recurse back into the best AA results we have, potentially with refined
@@ -1824,7 +1882,7 @@ void BasicAAResult::GetIndexDifference(
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
const Value *V = Src[i].V;
unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
- int64_t Scale = Src[i].Scale;
+ APInt Scale = Src[i].Scale;
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
@@ -1844,7 +1902,7 @@ void BasicAAResult::GetIndexDifference(
}
// If we didn't consume this entry, add it to the end of the Dest list.
- if (Scale) {
+ if (!!Scale) {
VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
Dest.push_back(Entry);
}
@@ -1852,13 +1910,16 @@ void BasicAAResult::GetIndexDifference(
}
bool BasicAAResult::constantOffsetHeuristic(
- const SmallVectorImpl<VariableGEPIndex> &VarIndices, LocationSize V1Size,
- LocationSize V2Size, int64_t BaseOffset, AssumptionCache *AC,
- DominatorTree *DT) {
- if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
- V2Size == MemoryLocation::UnknownSize)
+ const SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ LocationSize MaybeV1Size, LocationSize MaybeV2Size, APInt BaseOffset,
+ AssumptionCache *AC, DominatorTree *DT) {
+ if (VarIndices.size() != 2 || MaybeV1Size == LocationSize::unknown() ||
+ MaybeV2Size == LocationSize::unknown())
return false;
+ const uint64_t V1Size = MaybeV1Size.getValue();
+ const uint64_t V2Size = MaybeV2Size.getValue();
+
const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
@@ -1895,14 +1956,15 @@ bool BasicAAResult::constantOffsetHeuristic(
// the minimum distance between %i and %i + 5 is 3.
APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
MinDiff = APIntOps::umin(MinDiff, Wrapped);
- uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+ APInt MinDiffBytes =
+ MinDiff.zextOrTrunc(Var0.Scale.getBitWidth()) * Var0.Scale.abs();
// We can't definitely say whether GEP1 is before or after V2 due to wrapping
// arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
// values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
// V2Size can fit in the MinDiffBytes gap.
- return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
- V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+ return MinDiffBytes.uge(V1Size + BaseOffset.abs()) &&
+ MinDiffBytes.uge(V2Size + BaseOffset.abs());
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 41c295895213..ef27c36517ea 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -252,8 +252,8 @@ void BlockFrequencyInfo::setBlockFreqAndScale(
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
-void BlockFrequencyInfo::view() const {
- ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+void BlockFrequencyInfo::view(StringRef title) const {
+ ViewGraph(const_cast<BlockFrequencyInfo *>(this), title);
}
const Function *BlockFrequencyInfo::getFunction() const {
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 3d095068e7ff..08ebcc47a807 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -156,9 +156,9 @@ static void combineWeight(Weight &W, const Weight &OtherW) {
static void combineWeightsBySorting(WeightList &Weights) {
// Sort so edges to the same node are adjacent.
- llvm::sort(Weights.begin(), Weights.end(),
- [](const Weight &L,
- const Weight &R) { return L.TargetNode < R.TargetNode; });
+ llvm::sort(Weights, [](const Weight &L, const Weight &R) {
+ return L.TargetNode < R.TargetNode;
+ });
// Combine adjacent edges.
WeightList::iterator O = Weights.begin();
@@ -573,7 +573,9 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
APInt BlockFreq(128, Freq);
APInt EntryFreq(128, getEntryFreq());
BlockCount *= BlockFreq;
- BlockCount = BlockCount.udiv(EntryFreq);
+ // Rounded division of BlockCount by EntryFreq. Since EntryFreq is unsigned
+ // lshr by 1 gives EntryFreq/2.
+ BlockCount = (BlockCount + EntryFreq.lshr(1)).udiv(EntryFreq);
return BlockCount.getLimitedValue();
}
@@ -705,7 +707,7 @@ static void findIrreducibleHeaders(
"Expected irreducible CFG; -loop-info is likely invalid");
if (Headers.size() == InSCC.size()) {
// Every block is a header.
- llvm::sort(Headers.begin(), Headers.end());
+ llvm::sort(Headers);
return;
}
@@ -740,8 +742,8 @@ static void findIrreducibleHeaders(
Others.push_back(Irr.Node);
LLVM_DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
}
- llvm::sort(Headers.begin(), Headers.end());
- llvm::sort(Others.begin(), Others.end());
+ llvm::sort(Headers);
+ llvm::sort(Others);
}
static void createIrreducibleLoop(
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 54a657073f0f..7f544b27fe9d 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -135,7 +135,7 @@ static const uint32_t IH_NONTAKEN_WEIGHT = 1;
/// Add \p BB to PostDominatedByUnreachable set if applicable.
void
BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0) {
if (isa<UnreachableInst>(TI) ||
// If this block is terminated by a call to
@@ -167,7 +167,7 @@ BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
void
BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
assert(!PostDominatedByColdCall.count(BB));
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0)
return;
@@ -202,7 +202,7 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
/// Predict that a successor which leads necessarily to an
/// unreachable-terminated block as extremely unlikely.
bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
(void) TI;
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
assert(!isa<InvokeInst>(TI) &&
@@ -246,7 +246,7 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
// heuristic. The probability of the edge coming to unreachable block is
// set to min of metadata and unreachable heuristic.
bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI)))
return false;
@@ -348,7 +348,7 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) {
/// Return true if we could compute the weights for cold edges.
/// Return false, otherwise.
bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
- const TerminatorInst *TI = BB->getTerminator();
+ const Instruction *TI = BB->getTerminator();
(void) TI;
assert(TI->getNumSuccessors() > 1 && "expected more than one successor!");
assert(!isa<InvokeInst>(TI) &&
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index a319be8092f9..aa880a62b754 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -71,7 +71,7 @@ void llvm::FindFunctionBackedges(const Function &F,
/// successor.
unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
const BasicBlock *Succ) {
- const TerminatorInst *Term = BB->getTerminator();
+ const Instruction *Term = BB->getTerminator();
#ifndef NDEBUG
unsigned e = Term->getNumSuccessors();
#endif
@@ -85,8 +85,9 @@ unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
/// isCriticalEdge - Return true if the specified edge is a critical edge.
/// Critical edges are edges from a block with multiple successors to a block
/// with multiple predecessors.
-bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+bool llvm::isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges) {
+ assert(TI->isTerminator() && "Must be a terminator to have successors!");
assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
if (TI->getNumSuccessors() == 1) return false;
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 5b170dfa7903..6d01e9d5d447 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -7,9 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a '-dot-cfg' analysis pass, which emits the
-// cfg.<fnname>.dot file for each function in the program, with a graph of the
-// CFG for that function.
+// This file defines a `-dot-cfg` analysis pass, which emits the
+// `<prefix>.<fnname>.dot` file for each function in the program, with a graph
+// of the CFG for that function. The default value for `<prefix>` is `cfg` but
+// can be customized as needed.
//
// The other main feature of this file is that it implements the
// Function::viewCFG method, which is useful for debugging passes which operate
@@ -27,6 +28,10 @@ static cl::opt<std::string> CFGFuncName(
cl::desc("The name of a function (or its substring)"
" whose CFG is viewed/printed."));
+static cl::opt<std::string> CFGDotFilenamePrefix(
+ "cfg-dot-filename-prefix", cl::Hidden,
+ cl::desc("The prefix used for the CFG dot file names."));
+
namespace {
struct CFGViewerLegacyPass : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
@@ -90,7 +95,8 @@ PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
static void writeCFGToDotFile(Function &F, bool CFGOnly = false) {
if (!CFGFuncName.empty() && !F.getName().contains(CFGFuncName))
return;
- std::string Filename = ("cfg." + F.getName() + ".dot").str();
+ std::string Filename =
+ (CFGDotFilenamePrefix + "." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 194983418b08..1c61dd369a05 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -395,7 +395,7 @@ populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
}
// Sort AliasList for faster lookup
- llvm::sort(AliasList.begin(), AliasList.end());
+ llvm::sort(AliasList);
}
}
@@ -479,7 +479,7 @@ static void populateExternalRelations(
}
// Remove duplicates in ExtRelations
- llvm::sort(ExtRelations.begin(), ExtRelations.end());
+ llvm::sort(ExtRelations);
ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
ExtRelations.end());
}
@@ -515,10 +515,9 @@ CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
return None;
}
-bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
- LocationSize LHSSize,
- const Value *RHS,
- LocationSize RHSSize) const {
+bool CFLAndersAAResult::FunctionInfo::mayAlias(
+ const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS,
+ LocationSize MaybeRHSSize) const {
assert(LHS && RHS);
// Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
@@ -557,11 +556,14 @@ bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
OffsetValue{RHS, 0}, Comparator);
if (RangePair.first != RangePair.second) {
- // Be conservative about UnknownSize
- if (LHSSize == MemoryLocation::UnknownSize ||
- RHSSize == MemoryLocation::UnknownSize)
+ // Be conservative about unknown sizes
+ if (MaybeLHSSize == LocationSize::unknown() ||
+ MaybeRHSSize == LocationSize::unknown())
return true;
+ const uint64_t LHSSize = MaybeLHSSize.getValue();
+ const uint64_t RHSSize = MaybeRHSSize.getValue();
+
for (const auto &OVal : make_range(RangePair)) {
// Be conservative about UnknownOffset
if (OVal.Offset == UnknownOffset)
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index 86812009da7c..12121d717433 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -594,7 +594,7 @@ template <typename CFLAA> class CFLGraphBuilder {
// Determines whether or not we an instruction is useless to us (e.g.
// FenceInst)
static bool hasUsefulEdges(Instruction *Inst) {
- bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
+ bool IsNonInvokeRetTerminator = Inst->isTerminator() &&
!isa<InvokeInst>(Inst) &&
!isa<ReturnInst>(Inst);
return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index b325afb8e7c5..fd2292ced017 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -54,6 +54,11 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM,
LazyCallGraph &G, CGSCCUpdateResult &UR) {
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI =
+ AM.getResult<PassInstrumentationAnalysis>(InitialC, G);
+
PreservedAnalyses PA = PreservedAnalyses::all();
if (DebugLogging)
@@ -67,8 +72,18 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
if (DebugLogging)
dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n";
+ // Check the PassInstrumentation's BeforePass callbacks before running the
+ // pass, skip its execution completely if asked to (callback returns false).
+ if (!PI.runBeforePass(*Pass, *C))
+ continue;
+
PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR);
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C);
+
// Update the SCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 8e8535abd053..c57d8ef69d69 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -30,10 +30,13 @@ add_llvm_library(LLVMAnalysis
DominanceFrontier.cpp
EHPersonalities.cpp
GlobalsModRef.cpp
+ GuardUtils.cpp
+ IVDescriptors.cpp
IVUsers.cpp
IndirectCallPromotionAnalysis.cpp
InlineCost.cpp
InstCount.cpp
+ InstructionPrecedenceTracking.cpp
InstructionSimplify.cpp
Interval.cpp
IntervalPartition.cpp
@@ -42,6 +45,7 @@ add_llvm_library(LLVMAnalysis
LazyBlockFrequencyInfo.cpp
LazyCallGraph.cpp
LazyValueInfo.cpp
+ LegacyDivergenceAnalysis.cpp
Lint.cpp
Loads.cpp
LoopAccessAnalysis.cpp
@@ -64,6 +68,7 @@ add_llvm_library(LLVMAnalysis
ObjCARCInstKind.cpp
OptimizationRemarkEmitter.cpp
OrderedBasicBlock.cpp
+ OrderedInstructions.cpp
PHITransAddr.cpp
PhiValues.cpp
PostDominators.cpp
@@ -76,6 +81,8 @@ add_llvm_library(LLVMAnalysis
ScalarEvolutionAliasAnalysis.cpp
ScalarEvolutionExpander.cpp
ScalarEvolutionNormalization.cpp
+ StackSafetyAnalysis.cpp
+ SyncDependenceAnalysis.cpp
SyntheticCountsUtils.cpp
TargetLibraryInfo.cpp
TargetTransformInfo.cpp
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index cbdf5f63c557..0da678e1611b 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -97,8 +97,7 @@ void CallGraph::print(raw_ostream &OS) const {
for (const auto &I : *this)
Nodes.push_back(I.second.get());
- llvm::sort(Nodes.begin(), Nodes.end(),
- [](CallGraphNode *LHS, CallGraphNode *RHS) {
+ llvm::sort(Nodes, [](CallGraphNode *LHS, CallGraphNode *RHS) {
if (Function *LF = LHS->getFunction())
if (Function *RF = RHS->getFunction())
return LF->getName() < RF->getName();
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 4c33c420b65d..0aed57a39387 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -22,11 +22,13 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -123,24 +125,34 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
Module &M = CG.getModule();
if (!PM) {
- CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+ CallGraphSCCPass *CGSP = (CallGraphSCCPass *)P;
if (!CallGraphUpToDate) {
DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
CallGraphUpToDate = true;
}
{
- unsigned InstrCount = 0;
+ unsigned InstrCount, SCCCount = 0;
+ StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
TimeRegion PassTimer(getPassTimer(CGSP));
if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M);
+ InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
Changed = CGSP->runOnSCC(CurSCC);
- // If the pass modified the module, it may have modified the instruction
- // count of the module. Try emitting a remark.
- if (EmitICRemark)
- emitInstrCountChangedRemark(P, M, InstrCount);
+ if (EmitICRemark) {
+ // FIXME: Add getInstructionCount to CallGraphSCC.
+ SCCCount = M.getInstructionCount();
+ // Is there a difference in the number of instructions in the module?
+ if (SCCCount != InstrCount) {
+ // Yep. Emit a remark and update InstrCount.
+ int64_t Delta =
+ static_cast<int64_t>(SCCCount) - static_cast<int64_t>(InstrCount);
+ emitInstrCountChangedRemark(P, M, Delta, InstrCount,
+ FunctionToInstrCount);
+ InstrCount = SCCCount;
+ }
+ }
}
// After the CGSCCPass is done, when assertions are enabled, use
@@ -621,23 +633,40 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override {
bool BannerPrinted = false;
- auto PrintBannerOnce = [&] () {
+ auto PrintBannerOnce = [&]() {
if (BannerPrinted)
return;
OS << Banner;
BannerPrinted = true;
- };
+ };
+
+ bool NeedModule = llvm::forcePrintModuleIR();
+ if (isFunctionInPrintList("*") && NeedModule) {
+ PrintBannerOnce();
+ OS << "\n";
+ SCC.getCallGraph().getModule().print(OS, nullptr);
+ return false;
+ }
+ bool FoundFunction = false;
for (CallGraphNode *CGN : SCC) {
if (Function *F = CGN->getFunction()) {
if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) {
- PrintBannerOnce();
- F->print(OS);
+ FoundFunction = true;
+ if (!NeedModule) {
+ PrintBannerOnce();
+ F->print(OS);
+ }
}
} else if (isFunctionInPrintList("*")) {
PrintBannerOnce();
OS << "\nPrinting <null> Function\n";
}
}
+ if (NeedModule && FoundFunction) {
+ PrintBannerOnce();
+ OS << "\n";
+ SCC.getCallGraph().getModule().print(OS, nullptr);
+ }
return false;
}
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index d4f73bdb4361..669f4f2835fa 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -23,7 +23,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
@@ -158,7 +157,8 @@ namespace {
/// storing the value (or part of it) into memory anywhere automatically
/// counts as capturing it or not.
bool llvm::PointerMayBeCaptured(const Value *V,
- bool ReturnCaptures, bool StoreCaptures) {
+ bool ReturnCaptures, bool StoreCaptures,
+ unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
@@ -169,7 +169,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
(void)StoreCaptures;
SimpleCaptureTracker SCT(ReturnCaptures);
- PointerMayBeCaptured(V, &SCT);
+ PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
return SCT.Captured;
}
@@ -186,13 +186,15 @@ bool llvm::PointerMayBeCaptured(const Value *V,
bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
const DominatorTree *DT, bool IncludeI,
- OrderedBasicBlock *OBB) {
+ OrderedBasicBlock *OBB,
+ unsigned MaxUsesToExplore) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
bool UseNewOBB = OBB == nullptr;
if (!DT)
- return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+ return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures,
+ MaxUsesToExplore);
if (UseNewOBB)
OBB = new OrderedBasicBlock(I->getParent());
@@ -200,29 +202,25 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
// with StoreCaptures.
CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);
- PointerMayBeCaptured(V, &CB);
+ PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
if (UseNewOBB)
delete OBB;
return CB.Captured;
}
-/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
-/// a cache. Then we can move the code from BasicAliasAnalysis into
-/// that path, and remove this threshold.
-static int const Threshold = 20;
-
-void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
+void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
+ unsigned MaxUsesToExplore) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
- SmallVector<const Use *, Threshold> Worklist;
- SmallSet<const Use *, Threshold> Visited;
+ SmallVector<const Use *, DefaultMaxUsesToExplore> Worklist;
+ SmallSet<const Use *, DefaultMaxUsesToExplore> Visited;
auto AddUses = [&](const Value *V) {
- int Count = 0;
+ unsigned Count = 0;
for (const Use &U : V->uses()) {
// If there are lots of uses, conservatively say that the value
// is captured to avoid taking too much compile time.
- if (Count++ >= Threshold)
+ if (Count++ >= MaxUsesToExplore)
return Tracker->tooManyUses();
if (!Visited.insert(&U).second)
continue;
@@ -241,11 +239,12 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
switch (I->getOpcode()) {
case Instruction::Call:
case Instruction::Invoke: {
- CallSite CS(I);
+ auto *Call = cast<CallBase>(I);
// Not captured if the callee is readonly, doesn't return a copy through
// its return value and doesn't unwind (a readonly function can leak bits
// by throwing an exception or not depending on the input value).
- if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+ if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
+ Call->getType()->isVoidTy())
break;
// The pointer is not captured if returned pointer is not captured.
@@ -253,14 +252,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
// marked with nocapture do not capture. This means that places like
// GetUnderlyingObject in ValueTracking or DecomposeGEPExpression
// in BasicAA also need to know about this property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS)) {
- AddUses(I);
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call)) {
+ AddUses(Call);
break;
}
// Volatile operations effectively capture the memory location that they
// load and store to.
- if (auto *MI = dyn_cast<MemIntrinsic>(I))
+ if (auto *MI = dyn_cast<MemIntrinsic>(Call))
if (MI->isVolatile())
if (Tracker->captured(U))
return;
@@ -272,13 +271,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
// that loading a value from a pointer does not cause the pointer to be
// captured, even though the loaded value might be the pointer itself
// (think of self-referential objects).
- CallSite::data_operand_iterator B =
- CS.data_operands_begin(), E = CS.data_operands_end();
- for (CallSite::data_operand_iterator A = B; A != E; ++A)
- if (A->get() == V && !CS.doesNotCapture(A - B))
+ for (auto IdxOpPair : enumerate(Call->data_ops())) {
+ int Idx = IdxOpPair.index();
+ Value *A = IdxOpPair.value();
+ if (A == V && !Call->doesNotCapture(Idx))
// The parameter is not marked 'nocapture' - captured.
if (Tracker->captured(U))
return;
+ }
break;
}
case Instruction::Load:
diff --git a/lib/Analysis/CmpInstAnalysis.cpp b/lib/Analysis/CmpInstAnalysis.cpp
index 159c1a2d135a..27071babec5c 100644
--- a/lib/Analysis/CmpInstAnalysis.cpp
+++ b/lib/Analysis/CmpInstAnalysis.cpp
@@ -40,28 +40,28 @@ unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
}
}
-Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
- CmpInst::Predicate &NewICmpPred) {
+Constant *llvm::getPredForICmpCode(unsigned Code, bool Sign, Type *OpTy,
+ CmpInst::Predicate &Pred) {
switch (Code) {
default: llvm_unreachable("Illegal ICmp code!");
case 0: // False.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
- case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
- case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
- case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
- case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
- case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+ return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 0);
+ case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+ case 2: Pred = ICmpInst::ICMP_EQ; break;
+ case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+ case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+ case 5: Pred = ICmpInst::ICMP_NE; break;
+ case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
case 7: // True.
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+ return ConstantInt::get(CmpInst::makeCmpResultType(OpTy), 1);
}
return nullptr;
}
-bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
- return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
- (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
- (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+bool llvm::predicatesFoldable(ICmpInst::Predicate P1, ICmpInst::Predicate P2) {
+ return (CmpInst::isSigned(P1) == CmpInst::isSigned(P2)) ||
+ (CmpInst::isSigned(P1) && ICmpInst::isEquality(P2)) ||
+ (CmpInst::isSigned(P2) && ICmpInst::isEquality(P1));
}
bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index c5281c57bc19..5da29d6d2372 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -347,9 +347,20 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
// We're simulating a load through a pointer that was bitcast to point to
// a different type, so we can try to walk down through the initial
- // elements of an aggregate to see if some part of th e aggregate is
+ // elements of an aggregate to see if some part of the aggregate is
// castable to implement the "load" semantic model.
- C = C->getAggregateElement(0u);
+ if (SrcTy->isStructTy()) {
+ // Struct types might have leading zero-length elements like [0 x i32],
+ // which are certainly not what we are looking for, so skip them.
+ unsigned Elem = 0;
+ Constant *ElemC;
+ do {
+ ElemC = C->getAggregateElement(Elem++);
+ } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()) == 0);
+ C = ElemC;
+ } else {
+ C = C->getAggregateElement(0u);
+ }
} while (C);
return nullptr;
@@ -960,10 +971,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
NewIdxs.size() > *LastIRIndex) {
InRangeIndex = LastIRIndex;
for (unsigned I = 0; I <= *LastIRIndex; ++I)
- if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) {
- InRangeIndex = None;
- break;
- }
+ if (NewIdxs[I] != InnermostGEP->getOperand(I + 1))
+ return nullptr;
}
// Create a GEP.
@@ -985,11 +994,6 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
/// returned, if not, null is returned. Note that this function can fail when
/// attempting to fold instructions like loads and stores, which have no
/// constant expression form.
-///
-/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange
-/// etc information, due to only being passed an opcode and operands. Constant
-/// folding using this function strips this information.
-///
Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
@@ -1370,6 +1374,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::fabs:
case Intrinsic::minnum:
case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum:
case Intrinsic::log:
case Intrinsic::log2:
case Intrinsic::log10:
@@ -1389,6 +1395,8 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::copysign:
@@ -1402,6 +1410,10 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::usub_with_overflow:
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::usub_sat:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
case Intrinsic::bitreverse:
@@ -1413,6 +1425,23 @@ bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) {
case Intrinsic::x86_sse2_cvtsd2si64:
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64:
+ case Intrinsic::x86_avx512_vcvtss2si32:
+ case Intrinsic::x86_avx512_vcvtss2si64:
+ case Intrinsic::x86_avx512_cvttss2si:
+ case Intrinsic::x86_avx512_cvttss2si64:
+ case Intrinsic::x86_avx512_vcvtsd2si32:
+ case Intrinsic::x86_avx512_vcvtsd2si64:
+ case Intrinsic::x86_avx512_cvttsd2si:
+ case Intrinsic::x86_avx512_cvttsd2si64:
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64:
+ case Intrinsic::is_constant:
return true;
default:
return false;
@@ -1553,7 +1582,7 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V,
/// result. Returns null if the conversion cannot be performed, otherwise
/// returns the Constant value resulting from the conversion.
Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
- Type *Ty) {
+ Type *Ty, bool IsSigned) {
// All of these conversion intrinsics form an integer of at most 64bits.
unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
@@ -1565,11 +1594,11 @@ Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
: APFloat::rmNearestTiesToEven;
APFloat::opStatus status =
Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth,
- /*isSigned=*/true, mode, &isExact);
+ IsSigned, mode, &isExact);
if (status != APFloat::opOK &&
(!roundTowardZero || status != APFloat::opInexact))
return nullptr;
- return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+ return ConstantInt::get(Ty, UIntVal, IsSigned);
}
double getValueAsDouble(ConstantFP *Op) {
@@ -1587,14 +1616,49 @@ double getValueAsDouble(ConstantFP *Op) {
return APF.convertToDouble();
}
+static bool isManifestConstant(const Constant *c) {
+ if (isa<ConstantData>(c)) {
+ return true;
+ } else if (isa<ConstantAggregate>(c) || isa<ConstantExpr>(c)) {
+ for (const Value *subc : c->operand_values()) {
+ if (!isManifestConstant(cast<Constant>(subc)))
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
+ if (auto *CI = dyn_cast<ConstantInt>(Op)) {
+ C = &CI->getValue();
+ return true;
+ }
+ if (isa<UndefValue>(Op)) {
+ C = nullptr;
+ return true;
+ }
+ return false;
+}
+
Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI,
ImmutableCallSite CS) {
if (Operands.size() == 1) {
+ if (IntrinsicID == Intrinsic::is_constant) {
+ // We know we have a "Constant" argument. But we want to only
+ // return true for manifest constants, not those that depend on
+ // constants with unknowable values, e.g. GlobalValue or BlockAddress.
+ if (isManifestConstant(Operands[0]))
+ return ConstantInt::getTrue(Ty->getContext());
+ return nullptr;
+ }
if (isa<UndefValue>(Operands[0])) {
- // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN
- if (IntrinsicID == Intrinsic::cos)
+ // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
+ // ctpop() is between 0 and bitwidth, pick 0 for undef.
+ if (IntrinsicID == Intrinsic::cos ||
+ IntrinsicID == Intrinsic::ctpop)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::bswap ||
IntrinsicID == Intrinsic::bitreverse ||
@@ -1849,7 +1913,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/false, Ty);
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
break;
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
@@ -1858,7 +1923,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
- /*roundTowardZero=*/true, Ty);
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
break;
}
}
@@ -1899,6 +1965,18 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
}
+ if (IntrinsicID == Intrinsic::minimum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minimum(C1, C2));
+ }
+
+ if (IntrinsicID == Intrinsic::maximum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maximum(C1, C2));
+ }
+
if (!TLI)
return nullptr;
if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
@@ -1931,58 +2009,149 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return nullptr;
}
- if (auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
- if (auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ if (Operands[0]->getType()->isIntegerTy() &&
+ Operands[1]->getType()->isIntegerTy()) {
+ const APInt *C0, *C1;
+ if (!getConstIntOrUndef(Operands[0], C0) ||
+ !getConstIntOrUndef(Operands[1], C1))
+ return nullptr;
+
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ // Even if both operands are undef, we cannot fold muls to undef
+ // in the general case. For example, on i2 there are no inputs
+ // that would produce { i2 -1, i1 true } as the result.
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow: {
+ if (!C0 || !C1)
+ return UndefValue::get(Ty);
+
+ APInt Res;
+ bool Overflow;
switch (IntrinsicID) {
- default: break;
+ default: llvm_unreachable("Invalid case");
case Intrinsic::sadd_with_overflow:
+ Res = C0->sadd_ov(*C1, Overflow);
+ break;
case Intrinsic::uadd_with_overflow:
+ Res = C0->uadd_ov(*C1, Overflow);
+ break;
case Intrinsic::ssub_with_overflow:
+ Res = C0->ssub_ov(*C1, Overflow);
+ break;
case Intrinsic::usub_with_overflow:
+ Res = C0->usub_ov(*C1, Overflow);
+ break;
case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow: {
- APInt Res;
- bool Overflow;
- switch (IntrinsicID) {
- default: llvm_unreachable("Invalid case");
- case Intrinsic::sadd_with_overflow:
- Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::uadd_with_overflow:
- Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::ssub_with_overflow:
- Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::usub_with_overflow:
- Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::smul_with_overflow:
- Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
- break;
- case Intrinsic::umul_with_overflow:
- Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow);
- break;
- }
- Constant *Ops[] = {
- ConstantInt::get(Ty->getContext(), Res),
- ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
- };
- return ConstantStruct::get(cast<StructType>(Ty), Ops);
- }
- case Intrinsic::cttz:
- if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
- return UndefValue::get(Ty);
- return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
- case Intrinsic::ctlz:
- if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef.
- return UndefValue::get(Ty);
- return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
+ Res = C0->smul_ov(*C1, Overflow);
+ break;
+ case Intrinsic::umul_with_overflow:
+ Res = C0->umul_ov(*C1, Overflow);
+ break;
}
+ Constant *Ops[] = {
+ ConstantInt::get(Ty->getContext(), Res),
+ ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
+ };
+ return ConstantStruct::get(cast<StructType>(Ty), Ops);
+ }
+ case Intrinsic::uadd_sat:
+ case Intrinsic::sadd_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getAllOnesValue(Ty);
+ if (IntrinsicID == Intrinsic::uadd_sat)
+ return ConstantInt::get(Ty, C0->uadd_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->sadd_sat(*C1));
+ case Intrinsic::usub_sat:
+ case Intrinsic::ssub_sat:
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+ if (!C0 || !C1)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::usub_sat)
+ return ConstantInt::get(Ty, C0->usub_sat(*C1));
+ else
+ return ConstantInt::get(Ty, C0->ssub_sat(*C1));
+ case Intrinsic::cttz:
+ case Intrinsic::ctlz:
+ assert(C1 && "Must be constant int");
+
+ // cttz(0, 1) and ctlz(0, 1) are undef.
+ if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+ return UndefValue::get(Ty);
+ if (!C0)
+ return Constant::getNullValue(Ty);
+ if (IntrinsicID == Intrinsic::cttz)
+ return ConstantInt::get(Ty, C0->countTrailingZeros());
+ else
+ return ConstantInt::get(Ty, C0->countLeadingZeros());
}
return nullptr;
}
+
+ // Support ConstantVector in case we have an Undef in the top.
+ if ((isa<ConstantVector>(Operands[0]) ||
+ isa<ConstantDataVector>(Operands[0])) &&
+ // Check for default rounding mode.
+ // FIXME: Support other rounding modes?
+ isa<ConstantInt>(Operands[1]) &&
+ cast<ConstantInt>(Operands[1])->getValue() == 4) {
+ auto *Op = cast<Constant>(Operands[0]);
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::x86_avx512_vcvtss2si32:
+ case Intrinsic::x86_avx512_vcvtss2si64:
+ case Intrinsic::x86_avx512_vcvtsd2si32:
+ case Intrinsic::x86_avx512_vcvtsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_vcvtss2usi32:
+ case Intrinsic::x86_avx512_vcvtss2usi64:
+ case Intrinsic::x86_avx512_vcvtsd2usi32:
+ case Intrinsic::x86_avx512_vcvtsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/false, Ty,
+ /*IsSigned*/false);
+ break;
+ case Intrinsic::x86_avx512_cvttss2si:
+ case Intrinsic::x86_avx512_cvttss2si64:
+ case Intrinsic::x86_avx512_cvttsd2si:
+ case Intrinsic::x86_avx512_cvttsd2si64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/true);
+ break;
+ case Intrinsic::x86_avx512_cvttss2usi:
+ case Intrinsic::x86_avx512_cvttss2usi64:
+ case Intrinsic::x86_avx512_cvttsd2usi:
+ case Intrinsic::x86_avx512_cvttsd2usi64:
+ if (ConstantFP *FPOp =
+ dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+ return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
+ /*roundTowardZero=*/true, Ty,
+ /*IsSigned*/false);
+ break;
+ }
+ }
return nullptr;
}
@@ -2010,6 +2179,36 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
}
}
+ if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
+ const APInt *C0, *C1, *C2;
+ if (!getConstIntOrUndef(Operands[0], C0) ||
+ !getConstIntOrUndef(Operands[1], C1) ||
+ !getConstIntOrUndef(Operands[2], C2))
+ return nullptr;
+
+ bool IsRight = IntrinsicID == Intrinsic::fshr;
+ if (!C2)
+ return Operands[IsRight ? 1 : 0];
+ if (!C0 && !C1)
+ return UndefValue::get(Ty);
+
+ // The shift amount is interpreted as modulo the bitwidth. If the shift
+ // amount is effectively 0, avoid UB due to oversized inverse shift below.
+ unsigned BitWidth = C2->getBitWidth();
+ unsigned ShAmt = C2->urem(BitWidth);
+ if (!ShAmt)
+ return Operands[IsRight ? 1 : 0];
+
+ // (C0 << ShlAmt) | (C1 >> LshrAmt)
+ unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
+ unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
+ if (!C0)
+ return ConstantInt::get(Ty, C1->lshr(LshrAmt));
+ if (!C1)
+ return ConstantInt::get(Ty, C0->shl(ShlAmt));
+ return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
+ }
+
return nullptr;
}
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index e7637cd88327..34f785fb02be 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -21,8 +21,7 @@
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -39,6 +38,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/Pass.h"
@@ -50,6 +50,7 @@
#include <cstdint>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "demanded-bits"
@@ -78,13 +79,14 @@ void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
}
static bool isAlwaysLive(Instruction *I) {
- return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
- I->isEHPad() || I->mayHaveSideEffects();
+ return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
+ I->mayHaveSideEffects();
}
void DemandedBits::determineLiveOperandBits(
- const Instruction *UserI, const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) {
+ const Instruction *UserI, const Value *Val, unsigned OperandNo,
+ const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2,
+ bool &KnownBitsComputed) {
unsigned BitWidth = AB.getBitWidth();
// We're called once per operand, but for some instructions, we need to
@@ -95,7 +97,11 @@ void DemandedBits::determineLiveOperandBits(
// provided here.
auto ComputeKnownBits =
[&](unsigned BitWidth, const Value *V1, const Value *V2) {
- const DataLayout &DL = I->getModule()->getDataLayout();
+ if (KnownBitsComputed)
+ return;
+ KnownBitsComputed = true;
+
+ const DataLayout &DL = UserI->getModule()->getDataLayout();
Known = KnownBits(BitWidth);
computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT);
@@ -127,7 +133,7 @@ void DemandedBits::determineLiveOperandBits(
// We need some output bits, so we need all bits of the
// input to the left of, and including, the leftmost bit
// known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
+ ComputeKnownBits(BitWidth, Val, nullptr);
AB = APInt::getHighBitsSet(BitWidth,
std::min(BitWidth, Known.countMaxLeadingZeros()+1));
}
@@ -137,11 +143,33 @@ void DemandedBits::determineLiveOperandBits(
// We need some output bits, so we need all bits of the
// input to the right of, and including, the rightmost bit
// known to be one.
- ComputeKnownBits(BitWidth, I, nullptr);
+ ComputeKnownBits(BitWidth, Val, nullptr);
AB = APInt::getLowBitsSet(BitWidth,
std::min(BitWidth, Known.countMaxTrailingZeros()+1));
}
break;
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ const APInt *SA;
+ if (OperandNo == 2) {
+ // Shift amount is modulo the bitwidth. For powers of two we have
+ // SA % BW == SA & (BW - 1).
+ if (isPowerOf2_32(BitWidth))
+ AB = BitWidth - 1;
+ } else if (match(II->getOperand(2), m_APInt(SA))) {
+ // Normalize to funnel shift left. APInt shifts of BitWidth are well-
+ // defined, so no need to special-case zero shifts here.
+ uint64_t ShiftAmt = SA->urem(BitWidth);
+ if (II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmt = BitWidth - ShiftAmt;
+
+ if (OperandNo == 0)
+ AB = AOut.lshr(ShiftAmt);
+ else if (OperandNo == 1)
+ AB = AOut.shl(BitWidth - ShiftAmt);
+ }
+ break;
+ }
}
break;
case Instruction::Add:
@@ -153,8 +181,9 @@ void DemandedBits::determineLiveOperandBits(
AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
break;
case Instruction::Shl:
- if (OperandNo == 0)
- if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ if (OperandNo == 0) {
+ const APInt *ShiftAmtC;
+ if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.lshr(ShiftAmt);
@@ -166,10 +195,12 @@ void DemandedBits::determineLiveOperandBits(
else if (S->hasNoUnsignedWrap())
AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
}
+ }
break;
case Instruction::LShr:
- if (OperandNo == 0)
- if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ if (OperandNo == 0) {
+ const APInt *ShiftAmtC;
+ if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.shl(ShiftAmt);
@@ -178,10 +209,12 @@ void DemandedBits::determineLiveOperandBits(
if (cast<LShrOperator>(UserI)->isExact())
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
}
+ }
break;
case Instruction::AShr:
- if (OperandNo == 0)
- if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+ if (OperandNo == 0) {
+ const APInt *ShiftAmtC;
+ if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
AB = AOut.shl(ShiftAmt);
// Because the high input bit is replicated into the
@@ -196,6 +229,7 @@ void DemandedBits::determineLiveOperandBits(
if (cast<AShrOperator>(UserI)->isExact())
AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
}
+ }
break;
case Instruction::And:
AB = AOut;
@@ -204,14 +238,11 @@ void DemandedBits::determineLiveOperandBits(
// other operand are dead (unless they're both zero, in which
// case they can't both be dead, so just mark the LHS bits as
// dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+ if (OperandNo == 0)
AB &= ~Known2.Zero;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ else
AB &= ~(Known.Zero & ~Known2.Zero);
- }
break;
case Instruction::Or:
AB = AOut;
@@ -220,14 +251,11 @@ void DemandedBits::determineLiveOperandBits(
// other operand are dead (unless they're both one, in which
// case they can't both be dead, so just mark the LHS bits as
// dead).
- if (OperandNo == 0) {
- ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+ ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
+ if (OperandNo == 0)
AB &= ~Known2.One;
- } else {
- if (!isa<Instruction>(UserI->getOperand(0)))
- ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+ else
AB &= ~(Known.One & ~Known2.One);
- }
break;
case Instruction::Xor:
case Instruction::PHI:
@@ -253,6 +281,15 @@ void DemandedBits::determineLiveOperandBits(
if (OperandNo != 0)
AB = AOut;
break;
+ case Instruction::ExtractElement:
+ if (OperandNo == 0)
+ AB = AOut;
+ break;
+ case Instruction::InsertElement:
+ case Instruction::ShuffleVector:
+ if (OperandNo == 0 || OperandNo == 1)
+ AB = AOut;
+ break;
}
}
@@ -275,8 +312,9 @@ void DemandedBits::performAnalysis() {
Visited.clear();
AliveBits.clear();
+ DeadUses.clear();
- SmallVector<Instruction*, 128> Worklist;
+ SmallSetVector<Instruction*, 16> Worklist;
// Collect the set of "root" instructions that are known live.
for (Instruction &I : instructions(F)) {
@@ -288,9 +326,10 @@ void DemandedBits::performAnalysis() {
// bits and add the instruction to the work list. For other instructions
// add their operands to the work list (for integer values operands, mark
// all bits as live).
- if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
- Worklist.push_back(&I);
+ Type *T = I.getType();
+ if (T->isIntOrIntVectorTy()) {
+ if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
+ Worklist.insert(&I);
continue;
}
@@ -298,9 +337,10 @@ void DemandedBits::performAnalysis() {
// Non-integer-typed instructions...
for (Use &OI : I.operands()) {
if (Instruction *J = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
- AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
- Worklist.push_back(J);
+ Type *T = J->getType();
+ if (T->isIntOrIntVectorTy())
+ AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+ Worklist.insert(J);
}
}
// To save memory, we don't add I to the Visited set here. Instead, we
@@ -315,35 +355,51 @@ void DemandedBits::performAnalysis() {
LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
APInt AOut;
- if (UserI->getType()->isIntegerTy()) {
+ if (UserI->getType()->isIntOrIntVectorTy()) {
AOut = AliveBits[UserI];
- LLVM_DEBUG(dbgs() << " Alive Out: " << AOut);
+ LLVM_DEBUG(dbgs() << " Alive Out: 0x"
+ << Twine::utohexstr(AOut.getLimitedValue()));
}
LLVM_DEBUG(dbgs() << "\n");
- if (!UserI->getType()->isIntegerTy())
+ if (!UserI->getType()->isIntOrIntVectorTy())
Visited.insert(UserI);
KnownBits Known, Known2;
+ bool KnownBitsComputed = false;
// Compute the set of alive bits for each operand. These are anded into the
// existing set, if any, and if that changes the set of alive bits, the
// operand is added to the work-list.
for (Use &OI : UserI->operands()) {
- if (Instruction *I = dyn_cast<Instruction>(OI)) {
- if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
- unsigned BitWidth = IT->getBitWidth();
- APInt AB = APInt::getAllOnesValue(BitWidth);
- if (UserI->getType()->isIntegerTy() && !AOut &&
- !isAlwaysLive(UserI)) {
- AB = APInt(BitWidth, 0);
- } else {
- // If all bits of the output are dead, then all bits of the input
- // Bits of each operand that are used to compute alive bits of the
- // output are alive, all others are dead.
- determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
- Known, Known2);
- }
+ // We also want to detect dead uses of arguments, but will only store
+ // demanded bits for instructions.
+ Instruction *I = dyn_cast<Instruction>(OI);
+ if (!I && !isa<Argument>(OI))
+ continue;
+
+ Type *T = OI->getType();
+ if (T->isIntOrIntVectorTy()) {
+ unsigned BitWidth = T->getScalarSizeInBits();
+ APInt AB = APInt::getAllOnesValue(BitWidth);
+ if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
+ !isAlwaysLive(UserI)) {
+ // If all bits of the output are dead, then all bits of the input
+ // are also dead.
+ AB = APInt(BitWidth, 0);
+ } else {
+ // Bits of each operand that are used to compute alive bits of the
+ // output are alive, all others are dead.
+ determineLiveOperandBits(UserI, OI, OI.getOperandNo(), AOut, AB,
+ Known, Known2, KnownBitsComputed);
+
+ // Keep track of uses which have no demanded bits.
+ if (AB.isNullValue())
+ DeadUses.insert(&OI);
+ else
+ DeadUses.erase(&OI);
+ }
+ if (I) {
// If we've added to the set of alive bits (or the operand has not
// been previously visited), then re-queue the operand to be visited
// again.
@@ -355,11 +411,11 @@ void DemandedBits::performAnalysis() {
APInt ABNew = AB | ABPrev;
if (ABNew != ABPrev || ABI == AliveBits.end()) {
AliveBits[I] = std::move(ABNew);
- Worklist.push_back(I);
+ Worklist.insert(I);
}
- } else if (!Visited.count(I)) {
- Worklist.push_back(I);
}
+ } else if (I && !Visited.count(I)) {
+ Worklist.insert(I);
}
}
}
@@ -368,11 +424,13 @@ void DemandedBits::performAnalysis() {
APInt DemandedBits::getDemandedBits(Instruction *I) {
performAnalysis();
- const DataLayout &DL = I->getModule()->getDataLayout();
auto Found = AliveBits.find(I);
if (Found != AliveBits.end())
return Found->second;
- return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ return APInt::getAllOnesValue(
+ DL.getTypeSizeInBits(I->getType()->getScalarType()));
}
bool DemandedBits::isInstructionDead(Instruction *I) {
@@ -382,6 +440,31 @@ bool DemandedBits::isInstructionDead(Instruction *I) {
!isAlwaysLive(I);
}
+bool DemandedBits::isUseDead(Use *U) {
+ // We only track integer uses, everything else is assumed live.
+ if (!(*U)->getType()->isIntOrIntVectorTy())
+ return false;
+
+ // Uses by always-live instructions are never dead.
+ Instruction *UserI = cast<Instruction>(U->getUser());
+ if (isAlwaysLive(UserI))
+ return false;
+
+ performAnalysis();
+ if (DeadUses.count(U))
+ return true;
+
+ // If no output bits are demanded, no input bits are demanded and the use
+ // is dead. These uses might not be explicitly present in the DeadUses map.
+ if (UserI->getType()->isIntOrIntVectorTy()) {
+ auto Found = AliveBits.find(UserI);
+ if (Found != AliveBits.end() && Found->second.isNullValue())
+ return true;
+ }
+
+ return false;
+}
+
void DemandedBits::print(raw_ostream &OS) {
performAnalysis();
for (auto &KV : AliveBits) {
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 79c2728d5620..3f4dfa52e1da 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -194,6 +194,13 @@ void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
dumpExampleDependence(OS, info.get());
}
+PreservedAnalyses
+DependenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
+ OS << "'Dependence Analysis' for function '" << F.getName() << "':\n";
+ dumpExampleDependence(OS, &FAM.getResult<DependenceAnalysis>(F));
+ return PreservedAnalyses::all();
+}
+
//===----------------------------------------------------------------------===//
// Dependence methods
@@ -633,8 +640,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
const MemoryLocation &LocB) {
// Check the original locations (minus size) for noalias, which can happen for
// tbaa, incompatible underlying object locations, etc.
- MemoryLocation LocAS(LocA.Ptr, MemoryLocation::UnknownSize, LocA.AATags);
- MemoryLocation LocBS(LocB.Ptr, MemoryLocation::UnknownSize, LocB.AATags);
+ MemoryLocation LocAS(LocA.Ptr, LocationSize::unknown(), LocA.AATags);
+ MemoryLocation LocBS(LocB.Ptr, LocationSize::unknown(), LocB.AATags);
if (AA->alias(LocAS, LocBS) == NoAlias)
return NoAlias;
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index f5f1874c9303..7ba23854a3cc 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements divergence analysis which determines whether a branch
-// in a GPU program is divergent.It can help branch optimizations such as jump
+// This file implements a general divergence analysis for loop vectorization
+// and GPU programs. It determines which branches and values in a loop or GPU
+// program are divergent. It can help branch optimizations such as jump
// threading and loop unswitching to make better decisions.
//
// GPU programs typically use the SIMD execution model, where multiple threads
@@ -16,25 +17,29 @@
// code contains divergent branches (i.e., threads in a group do not agree on
// which path of the branch to take), the group of threads has to execute all
// the paths from that branch with different subsets of threads enabled until
-// they converge at the immediately post-dominating BB of the paths.
+// they re-converge.
//
// Due to this execution model, some optimizations such as jump
-// threading and loop unswitching can be unfortunately harmful when performed on
-// divergent branches. Therefore, an analysis that computes which branches in a
-// GPU program are divergent can help the compiler to selectively run these
-// optimizations.
+// threading and loop unswitching can interfere with thread re-convergence.
+// Therefore, an analysis that computes which branches in a GPU program are
+// divergent can help the compiler to selectively run these optimizations.
//
-// This file defines divergence analysis which computes a conservative but
-// non-trivial approximation of all divergent branches in a GPU program. It
-// partially implements the approach described in
+// This implementation is derived from the Vectorization Analysis of the
+// Region Vectorizer (RV). That implementation in turn is based on the approach
+// described in
//
-// Divergence Analysis
-// Sampaio, Souza, Collange, Pereira
-// TOPLAS '13
+// Improving Performance of OpenCL on CPUs
+// Ralf Karrenberg and Sebastian Hack
+// CC '12
//
-// The divergence analysis identifies the sources of divergence (e.g., special
-// variables that hold the thread ID), and recursively marks variables that are
-// data or sync dependent on a source of divergence as divergent.
+// This DivergenceAnalysis implementation is generic in the sense that it does
+// not itself identify original sources of divergence.
+// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
+// (GPUDivergenceAnalysis) for GPU programs, identify the sources of divergence
+// (e.g., special variables that hold the thread ID or the iteration variable).
+//
+// The generic implementation propagates divergence to variables that are data
+// or sync dependent on a source of divergence.
//
// While data dependency is a well-known concept, the notion of sync dependency
// is worth more explanation. Sync dependence characterizes the control flow
@@ -54,287 +59,399 @@
// because the branch "br i1 %cond" depends on %tid and affects which value %a
// is assigned to.
//
-// The current implementation has the following limitations:
+// The sync dependence detection (which branch induces divergence in which join
+// points) is implemented in the SyncDependenceAnalysis.
+//
+// The current DivergenceAnalysis implementation has the following limitations:
// 1. intra-procedural. It conservatively considers the arguments of a
// non-kernel-entry function and the return value of a function call as
// divergent.
// 2. memory as black box. It conservatively considers values loaded from
// generic or local address as divergent. This can be improved by leveraging
-// pointer analysis.
+// pointer analysis and/or by modelling non-escaping memory objects in SSA
+// as done in RV.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <vector>
+
using namespace llvm;
-#define DEBUG_TYPE "divergence"
-
-namespace {
-
-class DivergencePropagator {
-public:
- DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, DenseSet<const Value *> &DV)
- : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
- void populateWithSourcesOfDivergence();
- void propagate();
-
-private:
- // A helper function that explores data dependents of V.
- void exploreDataDependency(Value *V);
- // A helper function that explores sync dependents of TI.
- void exploreSyncDependency(TerminatorInst *TI);
- // Computes the influence region from Start to End. This region includes all
- // basic blocks on any simple path from Start to End.
- void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion);
- // Finds all users of I that are outside the influence region, and add these
- // users to Worklist.
- void findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
-
- Function &F;
- TargetTransformInfo &TTI;
- DominatorTree &DT;
- PostDominatorTree &PDT;
- std::vector<Value *> Worklist; // Stack for DFS.
- DenseSet<const Value *> &DV; // Stores all divergent values.
-};
-
-void DivergencePropagator::populateWithSourcesOfDivergence() {
- Worklist.clear();
- DV.clear();
- for (auto &I : instructions(F)) {
- if (TTI.isSourceOfDivergence(&I)) {
- Worklist.push_back(&I);
- DV.insert(&I);
- }
+#define DEBUG_TYPE "divergence-analysis"
+
+// class DivergenceAnalysis
+DivergenceAnalysis::DivergenceAnalysis(
+ const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
+ const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
+ : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
+ IsLCSSAForm(IsLCSSAForm) {}
+
+void DivergenceAnalysis::markDivergent(const Value &DivVal) {
+ assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
+ assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
+ DivergentValues.insert(&DivVal);
+}
+
+void DivergenceAnalysis::addUniformOverride(const Value &UniVal) {
+ UniformOverrides.insert(&UniVal);
+}
+
+bool DivergenceAnalysis::updateTerminator(const Instruction &Term) const {
+ if (Term.getNumSuccessors() <= 1)
+ return false;
+ if (auto *BranchTerm = dyn_cast<BranchInst>(&Term)) {
+ assert(BranchTerm->isConditional());
+ return isDivergent(*BranchTerm->getCondition());
}
- for (auto &Arg : F.args()) {
- if (TTI.isSourceOfDivergence(&Arg)) {
- Worklist.push_back(&Arg);
- DV.insert(&Arg);
- }
+ if (auto *SwitchTerm = dyn_cast<SwitchInst>(&Term)) {
+ return isDivergent(*SwitchTerm->getCondition());
+ }
+ if (isa<InvokeInst>(Term)) {
+ return false; // ignore abnormal executions through landingpad
}
+
+ llvm_unreachable("unexpected terminator");
}
-void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
- // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
- // immediate post dominator are divergent. This rule handles if-then-else
- // patterns. For example,
- //
- // if (tid < 5)
- // a1 = 1;
- // else
- // a2 = 2;
- // a = phi(a1, a2); // sync dependent on (tid < 5)
- BasicBlock *ThisBB = TI->getParent();
-
- // Unreachable blocks may not be in the dominator tree.
- if (!DT.isReachableFromEntry(ThisBB))
- return;
+bool DivergenceAnalysis::updateNormalInstruction(const Instruction &I) const {
+ // TODO function calls with side effects, etc
+ for (const auto &Op : I.operands()) {
+ if (isDivergent(*Op))
+ return true;
+ }
+ return false;
+}
- // If the function has no exit blocks or doesn't reach any exit blocks, the
- // post dominator may be null.
- DomTreeNode *ThisNode = PDT.getNode(ThisBB);
- if (!ThisNode)
- return;
+bool DivergenceAnalysis::isTemporalDivergent(const BasicBlock &ObservingBlock,
+ const Value &Val) const {
+ const auto *Inst = dyn_cast<const Instruction>(&Val);
+ if (!Inst)
+ return false;
+ // check whether any divergent loop carrying Val terminates before control
+ // proceeds to ObservingBlock
+ for (const auto *Loop = LI.getLoopFor(Inst->getParent());
+ Loop != RegionLoop && !Loop->contains(&ObservingBlock);
+ Loop = Loop->getParentLoop()) {
+ if (DivergentLoops.find(Loop) != DivergentLoops.end())
+ return true;
+ }
- BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
- if (IPostDom == nullptr)
- return;
+ return false;
+}
- for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
- // A PHINode is uniform if it returns the same value no matter which path is
- // taken.
- if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
- Worklist.push_back(&*I);
+bool DivergenceAnalysis::updatePHINode(const PHINode &Phi) const {
+ // joining divergent disjoint path in Phi parent block
+ if (!Phi.hasConstantOrUndefValue() && isJoinDivergent(*Phi.getParent())) {
+ return true;
}
- // Propagation rule 2: if a value defined in a loop is used outside, the user
- // is sync dependent on the condition of the loop exits that dominate the
- // user. For example,
- //
- // int i = 0;
- // do {
- // i++;
- // if (foo(i)) ... // uniform
- // } while (i < tid);
- // if (bar(i)) ... // divergent
+ // An incoming value could be divergent by itself.
+ // Otherwise, an incoming value could be uniform within the loop
+ // that carries its definition but it may appear divergent
+ // from outside the loop. This happens when divergent loop exits
+ // drop definitions of that uniform value in different iterations.
//
- // A program may contain unstructured loops. Therefore, we cannot leverage
- // LoopInfo, which only recognizes natural loops.
- //
- // The algorithm used here handles both natural and unstructured loops. Given
- // a branch TI, we first compute its influence region, the union of all simple
- // paths from TI to its immediate post dominator (IPostDom). Then, we search
- // for all the values defined in the influence region but used outside. All
- // these users are sync dependent on TI.
- DenseSet<BasicBlock *> InfluenceRegion;
- computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
- // An insight that can speed up the search process is that all the in-region
- // values that are used outside must dominate TI. Therefore, instead of
- // searching every basic blocks in the influence region, we search all the
- // dominators of TI until it is outside the influence region.
- BasicBlock *InfluencedBB = ThisBB;
- while (InfluenceRegion.count(InfluencedBB)) {
- for (auto &I : *InfluencedBB)
- findUsersOutsideInfluenceRegion(I, InfluenceRegion);
- DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
- if (IDomNode == nullptr)
- break;
- InfluencedBB = IDomNode->getBlock();
+ // for (int i = 0; i < n; ++i) { // 'i' is uniform inside the loop
+ // if (i % thread_id == 0) break; // divergent loop exit
+ // }
+ // int divI = i; // divI is divergent
+ for (size_t i = 0; i < Phi.getNumIncomingValues(); ++i) {
+ const auto *InVal = Phi.getIncomingValue(i);
+ if (isDivergent(*Phi.getIncomingValue(i)) ||
+ isTemporalDivergent(*Phi.getParent(), *InVal)) {
+ return true;
+ }
}
+ return false;
}
-void DivergencePropagator::findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
- for (User *U : I.users()) {
- Instruction *UserInst = cast<Instruction>(U);
- if (!InfluenceRegion.count(UserInst->getParent())) {
- if (DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
+bool DivergenceAnalysis::inRegion(const Instruction &I) const {
+ return I.getParent() && inRegion(*I.getParent());
+}
+
+bool DivergenceAnalysis::inRegion(const BasicBlock &BB) const {
+ return (!RegionLoop && BB.getParent() == &F) || RegionLoop->contains(&BB);
+}
+
+// marks all users of loop-carried values of the loop headed by LoopHeader as
+// divergent
+void DivergenceAnalysis::taintLoopLiveOuts(const BasicBlock &LoopHeader) {
+ auto *DivLoop = LI.getLoopFor(&LoopHeader);
+ assert(DivLoop && "loopHeader is not actually part of a loop");
+
+ SmallVector<BasicBlock *, 8> TaintStack;
+ DivLoop->getExitBlocks(TaintStack);
+
+ // Otherwise potential users of loop-carried values could be anywhere in the
+ // dominance region of DivLoop (including its fringes for phi nodes)
+ DenseSet<const BasicBlock *> Visited;
+ for (auto *Block : TaintStack) {
+ Visited.insert(Block);
+ }
+ Visited.insert(&LoopHeader);
+
+ while (!TaintStack.empty()) {
+ auto *UserBlock = TaintStack.back();
+ TaintStack.pop_back();
+
+ // don't spread divergence beyond the region
+ if (!inRegion(*UserBlock))
+ continue;
+
+ assert(!DivLoop->contains(UserBlock) &&
+ "irreducible control flow detected");
+
+ // phi nodes at the fringes of the dominance region
+ if (!DT.dominates(&LoopHeader, UserBlock)) {
+ // all PHI nodes of UserBlock become divergent
+ for (auto &Phi : UserBlock->phis()) {
+ Worklist.push_back(&Phi);
+ }
+ continue;
+ }
+
+ // taint outside users of values carried by DivLoop
+ for (auto &I : *UserBlock) {
+ if (isAlwaysUniform(I))
+ continue;
+ if (isDivergent(I))
+ continue;
+
+ for (auto &Op : I.operands()) {
+ auto *OpInst = dyn_cast<Instruction>(&Op);
+ if (!OpInst)
+ continue;
+ if (DivLoop->contains(OpInst->getParent())) {
+ markDivergent(I);
+ pushUsers(I);
+ break;
+ }
+ }
+ }
+
+ // visit all blocks in the dominance region
+ for (auto *SuccBlock : successors(UserBlock)) {
+ if (!Visited.insert(SuccBlock).second) {
+ continue;
+ }
+ TaintStack.push_back(SuccBlock);
}
}
}
-// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
-// to the influence region.
-static void
-addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion,
- std::vector<BasicBlock *> &InfluenceStack) {
- for (BasicBlock *Succ : successors(ThisBB)) {
- if (Succ != End && InfluenceRegion.insert(Succ).second)
- InfluenceStack.push_back(Succ);
+void DivergenceAnalysis::pushPHINodes(const BasicBlock &Block) {
+ for (const auto &Phi : Block.phis()) {
+ if (isDivergent(Phi))
+ continue;
+ Worklist.push_back(&Phi);
}
}
-void DivergencePropagator::computeInfluenceRegion(
- BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion) {
- assert(PDT.properlyDominates(End, Start) &&
- "End does not properly dominate Start");
-
- // The influence region starts from the end of "Start" to the beginning of
- // "End". Therefore, "Start" should not be in the region unless "Start" is in
- // a loop that doesn't contain "End".
- std::vector<BasicBlock *> InfluenceStack;
- addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
- while (!InfluenceStack.empty()) {
- BasicBlock *BB = InfluenceStack.back();
- InfluenceStack.pop_back();
- addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
+void DivergenceAnalysis::pushUsers(const Value &V) {
+ for (const auto *User : V.users()) {
+ const auto *UserInst = dyn_cast<const Instruction>(User);
+ if (!UserInst)
+ continue;
+
+ if (isDivergent(*UserInst))
+ continue;
+
+ // only compute divergent inside loop
+ if (!inRegion(*UserInst))
+ continue;
+ Worklist.push_back(UserInst);
}
}
-void DivergencePropagator::exploreDataDependency(Value *V) {
- // Follow def-use chains of V.
- for (User *U : V->users()) {
- Instruction *UserInst = cast<Instruction>(U);
- if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
+bool DivergenceAnalysis::propagateJoinDivergence(const BasicBlock &JoinBlock,
+ const Loop *BranchLoop) {
+ LLVM_DEBUG(dbgs() << "\tpropJoinDiv " << JoinBlock.getName() << "\n");
+
+ // ignore divergence outside the region
+ if (!inRegion(JoinBlock)) {
+ return false;
+ }
+
+ // push non-divergent phi nodes in JoinBlock to the worklist
+ pushPHINodes(JoinBlock);
+
+ // JoinBlock is a divergent loop exit
+ if (BranchLoop && !BranchLoop->contains(&JoinBlock)) {
+ return true;
}
+
+ // disjoint-paths divergent at JoinBlock
+ markBlockJoinDivergent(JoinBlock);
+ return false;
}
-void DivergencePropagator::propagate() {
- // Traverse the dependency graph using DFS.
- while (!Worklist.empty()) {
- Value *V = Worklist.back();
- Worklist.pop_back();
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
- // Terminators with less than two successors won't introduce sync
- // dependency. Ignore them.
- if (TI->getNumSuccessors() > 1)
- exploreSyncDependency(TI);
+void DivergenceAnalysis::propagateBranchDivergence(const Instruction &Term) {
+ LLVM_DEBUG(dbgs() << "propBranchDiv " << Term.getParent()->getName() << "\n");
+
+ markDivergent(Term);
+
+ const auto *BranchLoop = LI.getLoopFor(Term.getParent());
+
+ // whether there is a divergent loop exit from BranchLoop (if any)
+ bool IsBranchLoopDivergent = false;
+
+ // iterate over all blocks reachable by disjoint from Term within the loop
+ // also iterates over loop exits that become divergent due to Term.
+ for (const auto *JoinBlock : SDA.join_blocks(Term)) {
+ IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
+ }
+
+ // Branch loop is a divergent loop due to the divergent branch in Term
+ if (IsBranchLoopDivergent) {
+ assert(BranchLoop);
+ if (!DivergentLoops.insert(BranchLoop).second) {
+ return;
}
- exploreDataDependency(V);
+ propagateLoopDivergence(*BranchLoop);
}
}
-} /// end namespace anonymous
+void DivergenceAnalysis::propagateLoopDivergence(const Loop &ExitingLoop) {
+ LLVM_DEBUG(dbgs() << "propLoopDiv " << ExitingLoop.getName() << "\n");
+
+ // don't propagate beyond region
+ if (!inRegion(*ExitingLoop.getHeader()))
+ return;
-// Register this pass.
-char DivergenceAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
- false, true)
+ const auto *BranchLoop = ExitingLoop.getParentLoop();
-FunctionPass *llvm::createDivergenceAnalysisPass() {
- return new DivergenceAnalysis();
-}
+ // Uses of loop-carried values could occur anywhere
+ // within the dominance region of the definition. All loop-carried
+ // definitions are dominated by the loop header (reducible control).
+ // Thus all users have to be in the dominance region of the loop header,
+ // except PHI nodes that can also live at the fringes of the dom region
+ // (incoming defining value).
+ if (!IsLCSSAForm)
+ taintLoopLiveOuts(*ExitingLoop.getHeader());
+
+ // whether there is a divergent loop exit from BranchLoop (if any)
+ bool IsBranchLoopDivergent = false;
+
+ // iterate over all blocks reachable by disjoint paths from exits of
+ // ExitingLoop also iterates over loop exits (of BranchLoop) that in turn
+ // become divergent.
+ for (const auto *JoinBlock : SDA.join_blocks(ExitingLoop)) {
+ IsBranchLoopDivergent |= propagateJoinDivergence(*JoinBlock, BranchLoop);
+ }
-void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.setPreservesAll();
+ // Branch loop is a divergent due to divergent loop exit in ExitingLoop
+ if (IsBranchLoopDivergent) {
+ assert(BranchLoop);
+ if (!DivergentLoops.insert(BranchLoop).second) {
+ return;
+ }
+ propagateLoopDivergence(*BranchLoop);
+ }
}
-bool DivergenceAnalysis::runOnFunction(Function &F) {
- auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- if (TTIWP == nullptr)
- return false;
+void DivergenceAnalysis::compute() {
+ for (auto *DivVal : DivergentValues) {
+ pushUsers(*DivVal);
+ }
- TargetTransformInfo &TTI = TTIWP->getTTI(F);
- // Fast path: if the target does not have branch divergence, we do not mark
- // any branch as divergent.
- if (!TTI.hasBranchDivergence())
- return false;
+ // propagate divergence
+ while (!Worklist.empty()) {
+ const Instruction &I = *Worklist.back();
+ Worklist.pop_back();
- DivergentValues.clear();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- DivergencePropagator DP(F, TTI,
- getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- PDT, DivergentValues);
- DP.populateWithSourcesOfDivergence();
- DP.propagate();
- LLVM_DEBUG(
- dbgs() << "\nAfter divergence analysis on " << F.getName() << ":\n";
- print(dbgs(), F.getParent())
- );
- return false;
+ // maintain uniformity of overrides
+ if (isAlwaysUniform(I))
+ continue;
+
+ bool WasDivergent = isDivergent(I);
+ if (WasDivergent)
+ continue;
+
+ // propagate divergence caused by terminator
+ if (I.isTerminator()) {
+ if (updateTerminator(I)) {
+ // propagate control divergence to affected instructions
+ propagateBranchDivergence(I);
+ continue;
+ }
+ }
+
+ // update divergence of I due to divergent operands
+ bool DivergentUpd = false;
+ const auto *Phi = dyn_cast<const PHINode>(&I);
+ if (Phi) {
+ DivergentUpd = updatePHINode(*Phi);
+ } else {
+ DivergentUpd = updateNormalInstruction(I);
+ }
+
+ // propagate value divergence to users
+ if (DivergentUpd) {
+ markDivergent(I);
+ pushUsers(I);
+ }
+ }
+}
+
+bool DivergenceAnalysis::isAlwaysUniform(const Value &V) const {
+ return UniformOverrides.find(&V) != UniformOverrides.end();
+}
+
+bool DivergenceAnalysis::isDivergent(const Value &V) const {
+ return DivergentValues.find(&V) != DivergentValues.end();
}
void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
if (DivergentValues.empty())
return;
- const Value *FirstDivergentValue = *DivergentValues.begin();
- const Function *F;
- if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
- F = Arg->getParent();
- } else if (const Instruction *I =
- dyn_cast<Instruction>(FirstDivergentValue)) {
- F = I->getParent()->getParent();
- } else {
- llvm_unreachable("Only arguments and instructions can be divergent");
+ // iterate instructions using instructions() to ensure a deterministic order.
+ for (auto &I : instructions(F)) {
+ if (isDivergent(I))
+ OS << "DIVERGENT:" << I << '\n';
}
+}
- // Dumps all divergent values in F, arguments and then instructions.
- for (auto &Arg : F->args()) {
- OS << (DivergentValues.count(&Arg) ? "DIVERGENT: " : " ");
- OS << Arg << "\n";
+// class GPUDivergenceAnalysis
+GPUDivergenceAnalysis::GPUDivergenceAnalysis(Function &F,
+ const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ const LoopInfo &LI,
+ const TargetTransformInfo &TTI)
+ : SDA(DT, PDT, LI), DA(F, nullptr, DT, LI, SDA, false) {
+ for (auto &I : instructions(F)) {
+ if (TTI.isSourceOfDivergence(&I)) {
+ DA.markDivergent(I);
+ } else if (TTI.isAlwaysUniform(&I)) {
+ DA.addUniformOverride(I);
+ }
}
- // Iterate instructions using instructions() to ensure a deterministic order.
- for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
- auto &BB = *BI;
- OS << "\n " << BB.getName() << ":\n";
- for (auto &I : BB.instructionsWithoutDebug()) {
- OS << (DivergentValues.count(&I) ? "DIVERGENT: " : " ");
- OS << I << "\n";
+ for (auto &Arg : F.args()) {
+ if (TTI.isSourceOfDivergence(&Arg)) {
+ DA.markDivergent(Arg);
}
}
- OS << "\n";
+
+ DA.compute();
+}
+
+bool GPUDivergenceAnalysis::isDivergent(const Value &val) const {
+ return DA.isDivergent(val);
+}
+
+void GPUDivergenceAnalysis::print(raw_ostream &OS, const Module *mod) const {
+ OS << "Divergence of kernel " << DA.getFunction().getName() << " {\n";
+ DA.print(OS, mod);
+ OS << "}\n";
}
diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp
index 2d35a3fa9118..0df73aeebbdc 100644
--- a/lib/Analysis/EHPersonalities.cpp
+++ b/lib/Analysis/EHPersonalities.cpp
@@ -120,7 +120,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
<< "\'.\n");
BasicBlock *SuccColor = Color;
- TerminatorInst *Terminator = Visiting->getTerminator();
+ Instruction *Terminator = Visiting->getTerminator();
if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) {
Value *ParentPad = CatchRet->getCatchSwitchParentPad();
if (isa<ConstantTokenNone>(ParentPad))
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 2c503609d96b..b28abcadca4a 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -255,11 +255,11 @@ FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
}
FunctionModRefBehavior
-GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
+GlobalsAAResult::getModRefBehavior(const CallBase *Call) {
FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
- if (!CS.hasOperandBundles())
- if (const Function *F = CS.getCalledFunction())
+ if (!Call->hasOperandBundles())
+ if (const Function *F = Call->getCalledFunction())
if (FunctionInfo *FI = getFunctionInfo(F)) {
if (!isModOrRefSet(FI->getModRefInfo()))
Min = FMRB_DoesNotAccessMemory;
@@ -267,7 +267,7 @@ GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
Min = FMRB_OnlyReadsMemory;
}
- return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
}
/// Returns the function info for the function, or null if we don't have
@@ -366,14 +366,14 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
} else if (Operator::getOpcode(I) == Instruction::BitCast) {
if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
return true;
- } else if (auto CS = CallSite(I)) {
+ } else if (auto *Call = dyn_cast<CallBase>(I)) {
// Make sure that this is just the function being called, not that it is
// passing into the function.
- if (CS.isDataOperand(&U)) {
+ if (Call->isDataOperand(&U)) {
// Detect calls to free.
- if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
+ if (Call->isArgOperand(&U) && isFreeCall(I, &TLI)) {
if (Writers)
- Writers->insert(CS->getParent()->getParent());
+ Writers->insert(Call->getParent()->getParent());
} else {
return true; // Argument of an unknown call.
}
@@ -576,15 +576,15 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We handle calls specially because the graph-relevant aspects are
// handled above.
- if (auto CS = CallSite(&I)) {
- if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
+ if (isAllocationFn(Call, &TLI) || isFreeCall(Call, &TLI)) {
// FIXME: It is completely unclear why this is necessary and not
// handled by the above graph code.
FI.addModRefInfo(ModRefInfo::ModRef);
- } else if (Function *Callee = CS.getCalledFunction()) {
+ } else if (Function *Callee = Call->getCalledFunction()) {
// The callgraph doesn't include intrinsic calls.
if (Callee->isIntrinsic()) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (isa<DbgInfoIntrinsic>(Call))
// Don't let dbg intrinsics affect alias info.
continue;
@@ -885,16 +885,16 @@ AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
return AAResultBase::alias(LocA, LocB);
}
-ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
const GlobalValue *GV) {
- if (CS.doesNotAccessMemory())
+ if (Call->doesNotAccessMemory())
return ModRefInfo::NoModRef;
ModRefInfo ConservativeResult =
- CS.onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
+ Call->onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
- for (auto &A : CS.args()) {
+ for (auto &A : Call->args()) {
SmallVector<Value*, 4> Objects;
GetUnderlyingObjects(A, Objects, DL);
@@ -914,7 +914,7 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
return ModRefInfo::NoModRef;
}
-ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
ModRefInfo Known = ModRefInfo::ModRef;
@@ -923,15 +923,15 @@ ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
if (const GlobalValue *GV =
dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
if (GV->hasLocalLinkage())
- if (const Function *F = CS.getCalledFunction())
+ if (const Function *F = Call->getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
if (const FunctionInfo *FI = getFunctionInfo(F))
Known = unionModRef(FI->getModRefInfoForGlobal(*GV),
- getModRefInfoForArgument(CS, GV));
+ getModRefInfoForArgument(Call, GV));
if (!isModOrRefSet(Known))
return ModRefInfo::NoModRef; // No need to query other mod/ref analyses
- return intersectModRef(Known, AAResultBase::getModRefInfo(CS, Loc));
+ return intersectModRef(Known, AAResultBase::getModRefInfo(Call, Loc));
}
GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
diff --git a/lib/Analysis/GuardUtils.cpp b/lib/Analysis/GuardUtils.cpp
new file mode 100644
index 000000000000..08fa6abeafb5
--- /dev/null
+++ b/lib/Analysis/GuardUtils.cpp
@@ -0,0 +1,21 @@
+//===-- GuardUtils.cpp - Utils for work with guards -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Utils that are used to perform analyzes related to guards and their
+// conditions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/IR/PatternMatch.h"
+
+using namespace llvm;
+
+bool llvm::isGuard(const User *U) {
+ using namespace llvm::PatternMatch;
+ return match(U, m_Intrinsic<Intrinsic::experimental_guard>());
+}
diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
new file mode 100644
index 000000000000..aaebc4a481ec
--- /dev/null
+++ b/lib/Analysis/IVDescriptors.cpp
@@ -0,0 +1,1089 @@
+//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file "describes" induction and recurrence variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MustExecute.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "iv-descriptors"
+
+bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
+ SmallPtrSetImpl<Instruction *> &Set) {
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
+ if (!Set.count(dyn_cast<Instruction>(*Use)))
+ return false;
+ return true;
+}
+
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_IntegerOr:
+ case RK_IntegerAnd:
+ case RK_IntegerXor:
+ case RK_IntegerMinMax:
+ return true;
+ }
+ return false;
+}
+
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
+ return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+}
+
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+ switch (Kind) {
+ default:
+ break;
+ case RK_IntegerAdd:
+ case RK_IntegerMult:
+ case RK_FloatAdd:
+ case RK_FloatMult:
+ return true;
+ }
+ return false;
+}
+
+/// Determines if Phi may have been type-promoted. If Phi has a single user
+/// that ANDs the Phi with a type mask, return the user. RT is updated to
+/// account for the narrower bit width represented by the mask, and the AND
+/// instruction is added to CI.
+static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
+ if (!Phi->hasOneUse())
+ return Phi;
+
+ const APInt *M = nullptr;
+ Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
+
+ // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
+ // with a new integer type of the corresponding bit width.
+ if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
+ int32_t Bits = (*M + 1).exactLogBase2();
+ if (Bits > 0) {
+ RT = IntegerType::get(Phi->getContext(), Bits);
+ Visited.insert(Phi);
+ CI.insert(J);
+ return J;
+ }
+ }
+ return Phi;
+}
+
+/// Compute the minimal bit width needed to represent a reduction whose exit
+/// instruction is given by Exit.
+static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
+ DemandedBits *DB,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ bool IsSigned = false;
+ const DataLayout &DL = Exit->getModule()->getDataLayout();
+ uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
+
+ if (DB) {
+ // Use the demanded bits analysis to determine the bits that are live out
+ // of the exit instruction, rounding up to the nearest power of two. If the
+ // use of demanded bits results in a smaller bit width, we know the value
+ // must be positive (i.e., IsSigned = false), because if this were not the
+ // case, the sign bit would have been demanded.
+ auto Mask = DB->getDemandedBits(Exit);
+ MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
+ }
+
+ if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
+ // If demanded bits wasn't able to limit the bit width, we can try to use
+ // value tracking instead. This can be the case, for example, if the value
+ // may be negative.
+ auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
+ auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
+ MaxBitWidth = NumTypeBits - NumSignBits;
+ KnownBits Bits = computeKnownBits(Exit, DL);
+ if (!Bits.isNonNegative()) {
+ // If the value is not known to be non-negative, we set IsSigned to true,
+ // meaning that we will use sext instructions instead of zext
+ // instructions to restore the original type.
+ IsSigned = true;
+ if (!Bits.isNegative())
+ // If the value is not known to be negative, we don't known what the
+ // upper bit is, and therefore, we don't know what kind of extend we
+ // will need. In this case, just increase the bit width by one bit and
+ // use sext.
+ ++MaxBitWidth;
+ }
+ }
+ if (!isPowerOf2_64(MaxBitWidth))
+ MaxBitWidth = NextPowerOf2(MaxBitWidth);
+
+ return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
+ IsSigned);
+}
+
+/// Collect cast instructions that can be ignored in the vectorizer's cost
+/// model, given a reduction exit value and the minimal type in which the
+/// reduction can be represented.
+static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
+ Type *RecurrenceType,
+ SmallPtrSetImpl<Instruction *> &Casts) {
+
+ SmallVector<Instruction *, 8> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
+ Worklist.push_back(Exit);
+
+ while (!Worklist.empty()) {
+ Instruction *Val = Worklist.pop_back_val();
+ Visited.insert(Val);
+ if (auto *Cast = dyn_cast<CastInst>(Val))
+ if (Cast->getSrcTy() == RecurrenceType) {
+ // If the source type of a cast instruction is equal to the recurrence
+ // type, it will be eliminated, and should be ignored in the vectorizer
+ // cost model.
+ Casts.insert(Cast);
+ continue;
+ }
+
+ // Add all operands to the work list if they are loop-varying values that
+ // we haven't yet visited.
+ for (Value *O : cast<User>(Val)->operands())
+ if (auto *I = dyn_cast<Instruction>(O))
+ if (TheLoop->contains(I) && !Visited.count(I))
+ Worklist.push_back(I);
+ }
+}
+
+bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
+ Loop *TheLoop, bool HasFunNoNaNAttr,
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Reduction variables are only found in the loop header block.
+ if (Phi->getParent() != TheLoop->getHeader())
+ return false;
+
+ // Obtain the reduction start value from the value that comes from the loop
+ // preheader.
+ Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
+
+ // ExitInstruction is the single value which is used outside the loop.
+ // We only allow for a single reduction value to be used outside the loop.
+ // This includes users of the reduction, variables (which form a cycle
+ // which ends in the phi node).
+ Instruction *ExitInstruction = nullptr;
+ // Indicates that we found a reduction operation in our scan.
+ bool FoundReduxOp = false;
+
+ // We start with the PHI node and scan for all of the users of this
+ // instruction. All users must be instructions that can be used as reduction
+ // variables (such as ADD). We must have a single out-of-block user. The cycle
+ // must include the original PHI.
+ bool FoundStartPHI = false;
+
+ // To recognize min/max patterns formed by a icmp select sequence, we store
+ // the number of instruction we saw from the recognized min/max pattern,
+ // to make sure we only see exactly the two instructions.
+ unsigned NumCmpSelectPatternInst = 0;
+ InstDesc ReduxDesc(false, nullptr);
+
+ // Data used for determining if the recurrence has been type-promoted.
+ Type *RecurrenceType = Phi->getType();
+ SmallPtrSet<Instruction *, 4> CastInsts;
+ Instruction *Start = Phi;
+ bool IsSigned = false;
+
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ SmallVector<Instruction *, 8> Worklist;
+
+ // Return early if the recurrence kind does not match the type of Phi. If the
+ // recurrence kind is arithmetic, we attempt to look through AND operations
+ // resulting from the type promotion performed by InstCombine. Vector
+ // operations are not limited to the legal integer widths, so we may be able
+ // to evaluate the reduction in the narrower width.
+ if (RecurrenceType->isFloatingPointTy()) {
+ if (!isFloatingPointRecurrenceKind(Kind))
+ return false;
+ } else {
+ if (!isIntegerRecurrenceKind(Kind))
+ return false;
+ if (isArithmeticRecurrenceKind(Kind))
+ Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+ }
+
+ Worklist.push_back(Start);
+ VisitedInsts.insert(Start);
+
+ // A value in the reduction can be used:
+ // - By the reduction:
+ // - Reduction operation:
+ // - One use of reduction value (safe).
+ // - Multiple use of reduction value (not safe).
+ // - PHI:
+ // - All uses of the PHI must be the reduction (safe).
+ // - Otherwise, not safe.
+ // - By instructions outside of the loop (safe).
+ // * One value may have several outside users, but all outside
+ // uses must be of the same value.
+ // - By an instruction that is not part of the reduction (not safe).
+ // This is either:
+ // * An instruction type other than PHI or the reduction operation.
+ // * A PHI in the header other than the initial PHI.
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ // No Users.
+ // If the instruction has no users then this is a broken chain and can't be
+ // a reduction variable.
+ if (Cur->use_empty())
+ return false;
+
+ bool IsAPhi = isa<PHINode>(Cur);
+
+ // A header PHI use other than the original PHI.
+ if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
+ return false;
+
+ // Reductions of instructions such as Div, and Sub is only possible if the
+ // LHS is the reduction variable.
+ if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
+ !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ // Any reduction instruction must be of one of the allowed kinds. We ignore
+ // the starting value (the Phi or an AND instruction if the Phi has been
+ // type-promoted).
+ if (Cur != Start) {
+ ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+ if (!ReduxDesc.isRecurrence())
+ return false;
+ }
+
+ bool IsASelect = isa<SelectInst>(Cur);
+
+ // A conditional reduction operation must only have 2 or less uses in
+ // VisitedInsts.
+ if (IsASelect && (Kind == RK_FloatAdd || Kind == RK_FloatMult) &&
+ hasMultipleUsesOf(Cur, VisitedInsts, 2))
+ return false;
+
+ // A reduction operation must only have one use of the reduction value.
+ if (!IsAPhi && !IsASelect && Kind != RK_IntegerMinMax &&
+ Kind != RK_FloatMinMax && hasMultipleUsesOf(Cur, VisitedInsts, 1))
+ return false;
+
+ // All inputs to a PHI node must be a reduction value.
+ if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
+ return false;
+
+ if (Kind == RK_IntegerMinMax &&
+ (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+ if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
+ ++NumCmpSelectPatternInst;
+
+ // Check whether we found a reduction operator.
+ FoundReduxOp |= !IsAPhi && Cur != Start;
+
+ // Process users of current instruction. Push non-PHI nodes after PHI nodes
+ // onto the stack. This way we are going to have seen all inputs to PHI
+ // nodes once we get to them.
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (User *U : Cur->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ // Check if we found the exit user.
+ BasicBlock *Parent = UI->getParent();
+ if (!TheLoop->contains(Parent)) {
+ // If we already know this instruction is used externally, move on to
+ // the next user.
+ if (ExitInstruction == Cur)
+ continue;
+
+ // Exit if you find multiple values used outside or if the header phi
+ // node is being used. In this case the user uses the value of the
+ // previous iteration, in which case we would loose "VF-1" iterations of
+ // the reduction operation if we vectorize.
+ if (ExitInstruction != nullptr || Cur == Phi)
+ return false;
+
+ // The instruction used by an outside user must be the last instruction
+ // before we feed back to the reduction phi. Otherwise, we loose VF-1
+ // operations on the value.
+ if (!is_contained(Phi->operands(), Cur))
+ return false;
+
+ ExitInstruction = Cur;
+ continue;
+ }
+
+ // Process instructions only once (termination). Each reduction cycle
+ // value must only be used once, except by phi nodes and min/max
+ // reductions which are represented as a cmp followed by a select.
+ InstDesc IgnoredVal(false, nullptr);
+ if (VisitedInsts.insert(UI).second) {
+ if (isa<PHINode>(UI))
+ PHIs.push_back(UI);
+ else
+ NonPHIs.push_back(UI);
+ } else if (!isa<PHINode>(UI) &&
+ ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
+ !isa<SelectInst>(UI)) ||
+ (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
+ !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())))
+ return false;
+
+ // Remember that we completed the cycle.
+ if (UI == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
+
+ // This means we have seen one but not the other instruction of the
+ // pattern or more than just a select and cmp.
+ if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+ NumCmpSelectPatternInst != 2)
+ return false;
+
+ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
+ return false;
+
+ if (Start != Phi) {
+ // If the starting value is not the same as the phi node, we speculatively
+ // looked through an 'and' instruction when evaluating a potential
+ // arithmetic reduction to determine if it may have been type-promoted.
+ //
+ // We now compute the minimal bit width that is required to represent the
+ // reduction. If this is the same width that was indicated by the 'and', we
+ // can represent the reduction in the smaller type. The 'and' instruction
+ // will be eliminated since it will essentially be a cast instruction that
+ // can be ignore in the cost model. If we compute a different type than we
+ // did when evaluating the 'and', the 'and' will not be eliminated, and we
+ // will end up with different kinds of operations in the recurrence
+ // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
+ // the case.
+ //
+ // The vectorizer relies on InstCombine to perform the actual
+ // type-shrinking. It does this by inserting instructions to truncate the
+ // exit value of the reduction to the width indicated by RecurrenceType and
+ // then extend this value back to the original width. If IsSigned is false,
+ // a 'zext' instruction will be generated; otherwise, a 'sext' will be
+ // used.
+ //
+ // TODO: We should not rely on InstCombine to rewrite the reduction in the
+ // smaller type. We should just generate a correctly typed expression
+ // to begin with.
+ Type *ComputedType;
+ std::tie(ComputedType, IsSigned) =
+ computeRecurrenceType(ExitInstruction, DB, AC, DT);
+ if (ComputedType != RecurrenceType)
+ return false;
+
+ // The recurrence expression will be represented in a narrower type. If
+ // there are any cast instructions that will be unnecessary, collect them
+ // in CastInsts. Note that the 'and' instruction was already included in
+ // this list.
+ //
+ // TODO: A better way to represent this may be to tag in some way all the
+ // instructions that are a part of the reduction. The vectorizer cost
+ // model could then apply the recurrence type to these instructions,
+ // without needing a white list of instructions to ignore.
+ collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
+ }
+
+ // We found a reduction var if we have reached the original phi node and we
+ // only have a single instruction with out-of-loop users.
+
+ // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
+ // is saved as part of the RecurrenceDescriptor.
+
+ // Save the description of this reduction variable.
+ RecurrenceDescriptor RD(
+ RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+ ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
+ RedDes = RD;
+
+ return true;
+}
+
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
+
+ assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+ "Expect a select instruction");
+ Instruction *Cmp = nullptr;
+ SelectInst *Select = nullptr;
+
+ // We must handle the select(cmp()) as a single instruction. Advance to the
+ // select.
+ if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+ if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
+ return InstDesc(false, I);
+ return InstDesc(Select, Prev.getMinMaxKind());
+ }
+
+ // Only handle single use cases for now.
+ if (!(Select = dyn_cast<SelectInst>(I)))
+ return InstDesc(false, I);
+ if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+ !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+ return InstDesc(false, I);
+ if (!Cmp->hasOneUse())
+ return InstDesc(false, I);
+
+ Value *CmpLeft;
+ Value *CmpRight;
+
+ // Look for a min/max pattern.
+ if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_UIntMin);
+ else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_UIntMax);
+ else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_SIntMax);
+ else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_SIntMin);
+ else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMin);
+ else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMax);
+ else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMin);
+ else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+ return InstDesc(Select, MRK_FloatMax);
+
+ return InstDesc(false, I);
+}
+
+/// Returns true if the select instruction has users in the compare-and-add
+/// reduction pattern below. The select instruction argument is the last one
+/// in the sequence.
+///
+/// %sum.1 = phi ...
+/// ...
+/// %cmp = fcmp pred %0, %CFP
+/// %add = fadd %0, %sum.1
+/// %sum.2 = select %cmp, %add, %sum.1
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isConditionalRdxPattern(
+ RecurrenceKind Kind, Instruction *I) {
+ SelectInst *SI = dyn_cast<SelectInst>(I);
+ if (!SI)
+ return InstDesc(false, I);
+
+ CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
+ // Only handle single use cases for now.
+ if (!CI || !CI->hasOneUse())
+ return InstDesc(false, I);
+
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+ // Handle only when either of operands of select instruction is a PHI
+ // node for now.
+ if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
+ (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
+ return InstDesc(false, I);
+
+ Instruction *I1 =
+ isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
+ : dyn_cast<Instruction>(TrueVal);
+ if (!I1 || !I1->isBinaryOp())
+ return InstDesc(false, I);
+
+ Value *Op1, *Op2;
+ if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
+ m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
+ I1->isFast())
+ return InstDesc(Kind == RK_FloatAdd, SI);
+
+ if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
+ return InstDesc(Kind == RK_FloatMult, SI);
+
+ return InstDesc(false, I);
+}
+
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
+ InstDesc &Prev, bool HasFunNoNaNAttr) {
+ bool FP = I->getType()->isFloatingPointTy();
+ Instruction *UAI = Prev.getUnsafeAlgebraInst();
+ if (!UAI && FP && !I->isFast())
+ UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
+ switch (I->getOpcode()) {
+ default:
+ return InstDesc(false, I);
+ case Instruction::PHI:
+ return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
+ case Instruction::Sub:
+ case Instruction::Add:
+ return InstDesc(Kind == RK_IntegerAdd, I);
+ case Instruction::Mul:
+ return InstDesc(Kind == RK_IntegerMult, I);
+ case Instruction::And:
+ return InstDesc(Kind == RK_IntegerAnd, I);
+ case Instruction::Or:
+ return InstDesc(Kind == RK_IntegerOr, I);
+ case Instruction::Xor:
+ return InstDesc(Kind == RK_IntegerXor, I);
+ case Instruction::FMul:
+ return InstDesc(Kind == RK_FloatMult, I, UAI);
+ case Instruction::FSub:
+ case Instruction::FAdd:
+ return InstDesc(Kind == RK_FloatAdd, I, UAI);
+ case Instruction::Select:
+ if (Kind == RK_FloatAdd || Kind == RK_FloatMult)
+ return isConditionalRdxPattern(Kind, I);
+ LLVM_FALLTHROUGH;
+ case Instruction::FCmp:
+ case Instruction::ICmp:
+ if (Kind != RK_IntegerMinMax &&
+ (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+ return InstDesc(false, I);
+ return isMinMaxSelectCmpPattern(I, Prev);
+ }
+}
+
+bool RecurrenceDescriptor::hasMultipleUsesOf(
+ Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,
+ unsigned MaxNumUses) {
+ unsigned NumUses = 0;
+ for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
+ ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > MaxNumUses)
+ return true;
+ }
+
+ return false;
+}
+bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB, AssumptionCache *AC,
+ DominatorTree *DT) {
+
+ BasicBlock *Header = TheLoop->getHeader();
+ Function &F = *Header->getParent();
+ bool HasFunNoNaNAttr =
+ F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
+
+ if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
+ DB, AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
+ LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi
+ << "\n");
+ return true;
+ }
+ // Not a reduction of known type.
+ return false;
+}
+
+bool RecurrenceDescriptor::isFirstOrderRecurrence(
+ PHINode *Phi, Loop *TheLoop,
+ DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
+
+ // Ensure the phi node is in the loop header and has two incoming values.
+ if (Phi->getParent() != TheLoop->getHeader() ||
+ Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Ensure the loop has a preheader and a single latch block. The loop
+ // vectorizer will need the latch to set up the next iteration of the loop.
+ auto *Preheader = TheLoop->getLoopPreheader();
+ auto *Latch = TheLoop->getLoopLatch();
+ if (!Preheader || !Latch)
+ return false;
+
+ // Ensure the phi node's incoming blocks are the loop preheader and latch.
+ if (Phi->getBasicBlockIndex(Preheader) < 0 ||
+ Phi->getBasicBlockIndex(Latch) < 0)
+ return false;
+
+ // Get the previous value. The previous value comes from the latch edge while
+ // the initial value comes form the preheader edge.
+ auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+ SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
+ return false;
+
+ // Ensure every user of the phi node is dominated by the previous value.
+ // The dominance requirement ensures the loop vectorizer will not need to
+ // vectorize the initial value prior to the first iteration of the loop.
+ // TODO: Consider extending this sinking to handle other kinds of instructions
+ // and expressions, beyond sinking a single cast past Previous.
+ if (Phi->hasOneUse()) {
+ auto *I = Phi->user_back();
+ if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
+ DT->dominates(Previous, I->user_back())) {
+ if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
+ SinkAfter[I] = Previous;
+ return true;
+ }
+ }
+
+ for (User *U : Phi->users())
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (!DT->dominates(Previous, I))
+ return false;
+ }
+
+ return true;
+}
+
+/// This function returns the identity element (or neutral element) for
+/// the operation K.
+Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
+ Type *Tp) {
+ switch (K) {
+ case RK_IntegerXor:
+ case RK_IntegerAdd:
+ case RK_IntegerOr:
+ // Adding, Xoring, Oring zero to a number does not change it.
+ return ConstantInt::get(Tp, 0);
+ case RK_IntegerMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantInt::get(Tp, 1);
+ case RK_IntegerAnd:
+ // AND-ing a number with an all-1 value does not change it.
+ return ConstantInt::get(Tp, -1, true);
+ case RK_FloatMult:
+ // Multiplying a number by 1 does not change it.
+ return ConstantFP::get(Tp, 1.0L);
+ case RK_FloatAdd:
+ // Adding zero to a number does not change it.
+ return ConstantFP::get(Tp, 0.0L);
+ default:
+ llvm_unreachable("Unknown recurrence kind");
+ }
+}
+
+/// This function translates the recurrence kind to an LLVM binary operator.
+unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
+ switch (Kind) {
+ case RK_IntegerAdd:
+ return Instruction::Add;
+ case RK_IntegerMult:
+ return Instruction::Mul;
+ case RK_IntegerOr:
+ return Instruction::Or;
+ case RK_IntegerAnd:
+ return Instruction::And;
+ case RK_IntegerXor:
+ return Instruction::Xor;
+ case RK_FloatMult:
+ return Instruction::FMul;
+ case RK_FloatAdd:
+ return Instruction::FAdd;
+ case RK_IntegerMinMax:
+ return Instruction::ICmp;
+ case RK_FloatMinMax:
+ return Instruction::FCmp;
+ default:
+ llvm_unreachable("Unknown recurrence operation");
+ }
+}
+
+InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
+ const SCEV *Step, BinaryOperator *BOp,
+ SmallVectorImpl<Instruction *> *Casts)
+ : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
+ assert(IK != IK_NoInduction && "Not an induction");
+
+ // Start value type should match the induction kind and the value
+ // itself should not be null.
+ assert(StartValue && "StartValue is null");
+ assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+ "StartValue is not a pointer for pointer induction");
+ assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+ "StartValue is not an integer for integer induction");
+
+ // Check the Step Value. It should be non-zero integer value.
+ assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
+ "Step value is zero");
+
+ assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
+ "Step value should be constant for pointer induction");
+ assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
+ "StepValue is not an integer");
+
+ assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
+ "StepValue is not FP for FpInduction");
+ assert((IK != IK_FpInduction ||
+ (InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub))) &&
+ "Binary opcode should be specified for FP induction");
+
+ if (Casts) {
+ for (auto &Inst : *Casts) {
+ RedundantCasts.push_back(Inst);
+ }
+ }
+}
+
+int InductionDescriptor::getConsecutiveDirection() const {
+ ConstantInt *ConstStep = getConstIntStepValue();
+ if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
+ return ConstStep->getSExtValue();
+ return 0;
+}
+
+ConstantInt *InductionDescriptor::getConstIntStepValue() const {
+ if (isa<SCEVConstant>(Step))
+ return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
+ return nullptr;
+}
+
+bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ ScalarEvolution *SE,
+ InductionDescriptor &D) {
+
+ // Here we only handle FP induction variables.
+ assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
+
+ if (TheLoop->getHeader() != Phi->getParent())
+ return false;
+
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi if it has a unique entry value and a unique backedge value.
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+ Value *BEValue = nullptr, *StartValue = nullptr;
+ if (TheLoop->contains(Phi->getIncomingBlock(0))) {
+ BEValue = Phi->getIncomingValue(0);
+ StartValue = Phi->getIncomingValue(1);
+ } else {
+ assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
+ "Unexpected Phi node in the loop");
+ BEValue = Phi->getIncomingValue(1);
+ StartValue = Phi->getIncomingValue(0);
+ }
+
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
+ if (!BOp)
+ return false;
+
+ Value *Addend = nullptr;
+ if (BOp->getOpcode() == Instruction::FAdd) {
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+ else if (BOp->getOperand(1) == Phi)
+ Addend = BOp->getOperand(0);
+ } else if (BOp->getOpcode() == Instruction::FSub)
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+
+ if (!Addend)
+ return false;
+
+ // The addend should be loop invariant
+ if (auto *I = dyn_cast<Instruction>(Addend))
+ if (TheLoop->contains(I))
+ return false;
+
+ // FP Step has unknown SCEV
+ const SCEV *Step = SE->getUnknown(Addend);
+ D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
+ return true;
+}
+
+/// This function is called when we suspect that the update-chain of a phi node
+/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
+/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
+/// predicate P under which the SCEV expression for the phi can be the
+/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
+/// cast instructions that are involved in the update-chain of this induction.
+/// A caller that adds the required runtime predicate can be free to drop these
+/// cast instructions, and compute the phi using \p AR (instead of some scev
+/// expression with casts).
+///
+/// For example, without a predicate the scev expression can take the following
+/// form:
+/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
+///
+/// It corresponds to the following IR sequence:
+/// %for.body:
+/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
+/// %casted_phi = "ExtTrunc i64 %x"
+/// %add = add i64 %casted_phi, %step
+///
+/// where %x is given in \p PN,
+/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
+/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
+/// several forms, for example, such as:
+/// ExtTrunc1: %casted_phi = and %x, 2^n-1
+/// or:
+/// ExtTrunc2: %t = shl %x, m
+/// %casted_phi = ashr %t, m
+///
+/// If we are able to find such sequence, we return the instructions
+/// we found, namely %casted_phi and the instructions on its use-def chain up
+/// to the phi (not including the phi).
+static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
+ const SCEVUnknown *PhiScev,
+ const SCEVAddRecExpr *AR,
+ SmallVectorImpl<Instruction *> &CastInsts) {
+
+ assert(CastInsts.empty() && "CastInsts is expected to be empty.");
+ auto *PN = cast<PHINode>(PhiScev->getValue());
+ assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
+ const Loop *L = AR->getLoop();
+
+ // Find any cast instructions that participate in the def-use chain of
+ // PhiScev in the loop.
+ // FORNOW/TODO: We currently expect the def-use chain to include only
+ // two-operand instructions, where one of the operands is an invariant.
+ // createAddRecFromPHIWithCasts() currently does not support anything more
+ // involved than that, so we keep the search simple. This can be
+ // extended/generalized as needed.
+
+ auto getDef = [&](const Value *Val) -> Value * {
+ const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
+ if (!BinOp)
+ return nullptr;
+ Value *Op0 = BinOp->getOperand(0);
+ Value *Op1 = BinOp->getOperand(1);
+ Value *Def = nullptr;
+ if (L->isLoopInvariant(Op0))
+ Def = Op1;
+ else if (L->isLoopInvariant(Op1))
+ Def = Op0;
+ return Def;
+ };
+
+ // Look for the instruction that defines the induction via the
+ // loop backedge.
+ BasicBlock *Latch = L->getLoopLatch();
+ if (!Latch)
+ return false;
+ Value *Val = PN->getIncomingValueForBlock(Latch);
+ if (!Val)
+ return false;
+
+ // Follow the def-use chain until the induction phi is reached.
+ // If on the way we encounter a Value that has the same SCEV Expr as the
+ // phi node, we can consider the instructions we visit from that point
+ // as part of the cast-sequence that can be ignored.
+ bool InCastSequence = false;
+ auto *Inst = dyn_cast<Instruction>(Val);
+ while (Val != PN) {
+ // If we encountered a phi node other than PN, or if we left the loop,
+ // we bail out.
+ if (!Inst || !L->contains(Inst)) {
+ return false;
+ }
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
+ if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
+ InCastSequence = true;
+ if (InCastSequence) {
+ // Only the last instruction in the cast sequence is expected to have
+ // uses outside the induction def-use chain.
+ if (!CastInsts.empty())
+ if (!Inst->hasOneUse())
+ return false;
+ CastInsts.push_back(Inst);
+ }
+ Val = getDef(Val);
+ if (!Val)
+ return false;
+ Inst = dyn_cast<Instruction>(Val);
+ }
+
+ return InCastSequence;
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ PredicatedScalarEvolution &PSE,
+ InductionDescriptor &D, bool Assume) {
+ Type *PhiTy = Phi->getType();
+
+ // Handle integer and pointer inductions variables.
+ // Now we handle also FP induction but not trying to make a
+ // recurrent expression from the PHI node in-place.
+
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
+ !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
+ return false;
+
+ if (PhiTy->isFloatingPointTy())
+ return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
+
+ const SCEV *PhiScev = PSE.getSCEV(Phi);
+ const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+ // We need this expression to be an AddRecExpr.
+ if (Assume && !AR)
+ AR = PSE.getAsAddRec(Phi);
+
+ if (!AR) {
+ LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ // Record any Cast instructions that participate in the induction update
+ const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
+ // If we started from an UnknownSCEV, and managed to build an addRecurrence
+ // only after enabling Assume with PSCEV, this means we may have encountered
+ // cast instructions that required adding a runtime check in order to
+ // guarantee the correctness of the AddRecurence respresentation of the
+ // induction.
+ if (PhiScev != AR && SymbolicPhi) {
+ SmallVector<Instruction *, 2> Casts;
+ if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
+ }
+
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
+}
+
+bool InductionDescriptor::isInductionPHI(
+ PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
+ InductionDescriptor &D, const SCEV *Expr,
+ SmallVectorImpl<Instruction *> *CastsToIgnore) {
+ Type *PhiTy = Phi->getType();
+ // We only handle integer and pointer inductions variables.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
+ // Check that the PHI is consecutive.
+ const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+
+ if (!AR) {
+ LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+
+ if (AR->getLoop() != TheLoop) {
+ // FIXME: We should treat this as a uniform. Unfortunately, we
+ // don't currently know how to handled uniform PHIs.
+ LLVM_DEBUG(
+ dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
+ return false;
+ }
+
+ Value *StartValue =
+ Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+ // Calculate the pointer stride and check if it is consecutive.
+ // The stride may be a constant or a loop invariant integer value.
+ const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+ if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
+ return false;
+
+ if (PhiTy->isIntegerTy()) {
+ D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/nullptr,
+ CastsToIgnore);
+ return true;
+ }
+
+ assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
+ // Pointer induction should be a constant.
+ if (!ConstStep)
+ return false;
+
+ ConstantInt *CV = ConstStep->getValue();
+ Type *PointerElementType = PhiTy->getPointerElementType();
+ // The pointer stride cannot be determined if the pointer element type is not
+ // sized.
+ if (!PointerElementType->isSized())
+ return false;
+
+ const DataLayout &DL = Phi->getModule()->getDataLayout();
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+ if (!Size)
+ return false;
+
+ int64_t CVSize = CV->getSExtValue();
+ if (CVSize % Size)
+ return false;
+ auto *StepValue =
+ SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
+ return true;
+}
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 4659c0a00629..d6e6e76af03c 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -15,7 +15,7 @@
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index a6cccc3b5910..6ddb3cbc01a3 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -30,6 +31,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstVisitor.h"
@@ -137,7 +139,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool HasReturn;
bool HasIndirectBr;
bool HasUninlineableIntrinsic;
- bool UsesVarArgs;
+ bool InitsVargArgs;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -227,7 +229,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
BlockFrequencyInfo *CallerBFI);
// Custom analysis routines.
- bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
+ InlineResult analyzeBlock(BasicBlock *BB,
+ SmallPtrSetImpl<const Value *> &EphValues);
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
@@ -282,7 +285,7 @@ public:
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasUninlineableIntrinsic(false), UsesVarArgs(false), AllocatedSize(0),
+ HasUninlineableIntrinsic(false), InitsVargArgs(false), AllocatedSize(0),
NumInstructions(0), NumVectorInstructions(0), VectorBonus(0),
SingleBBBonus(0), EnableLoadElimination(true), LoadEliminationCost(0),
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
@@ -290,7 +293,7 @@ public:
NumInstructionsSimplified(0), SROACostSavings(0),
SROACostSavingsLost(0) {}
- bool analyzeCall(CallSite CS);
+ InlineResult analyzeCall(CallSite CS);
int getThreshold() { return Threshold; }
int getCost() { return Cost; }
@@ -719,6 +722,7 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
case Instruction::FPToSI:
if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
Cost += InlineConstants::CallPenalty;
+ break;
default:
break;
}
@@ -1238,8 +1242,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
HasUninlineableIntrinsic = true;
return false;
case Intrinsic::vastart:
- case Intrinsic::vaend:
- UsesVarArgs = true;
+ InitsVargArgs = true;
return false;
}
}
@@ -1541,8 +1544,9 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
- SmallPtrSetImpl<const Value *> &EphValues) {
+InlineResult
+CallAnalyzer::analyzeBlock(BasicBlock *BB,
+ SmallPtrSetImpl<const Value *> &EphValues) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,
@@ -1574,16 +1578,29 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
- if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr || HasUninlineableIntrinsic || UsesVarArgs) {
+ InlineResult IR;
+ if (IsRecursiveCall)
+ IR = "recursive";
+ else if (ExposesReturnsTwice)
+ IR = "exposes returns twice";
+ else if (HasDynamicAlloca)
+ IR = "dynamic alloca";
+ else if (HasIndirectBr)
+ IR = "indirect branch";
+ else if (HasUninlineableIntrinsic)
+ IR = "uninlinable intrinsic";
+ else if (InitsVargArgs)
+ IR = "varargs";
+ if (!IR) {
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
- << NV("Callee", &F)
- << " has uninlinable pattern and cost is not fully computed";
+ << NV("Callee", &F) << " has uninlinable pattern ("
+ << NV("InlineResult", IR.message)
+ << ") and cost is not fully computed";
});
- return false;
+ return IR;
}
// If the caller is a recursive function then we don't want to inline
@@ -1591,15 +1608,15 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
// the caller stack usage dramatically.
if (IsCallerRecursive &&
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) {
+ InlineResult IR = "recursive and allocates too much stack space";
if (ORE)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline",
CandidateCS.getInstruction())
- << NV("Callee", &F)
- << " is recursive and allocates too much stack space. Cost is "
- "not fully computed";
+ << NV("Callee", &F) << " is " << NV("InlineResult", IR.message)
+ << ". Cost is not fully computed";
});
- return false;
+ return IR;
}
// Check if we've past the maximum possible threshold so we don't spin in
@@ -1695,7 +1712,7 @@ void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) {
/// factors and heuristics. If this method returns false but the computed cost
/// is below the computed threshold, then inlining was forcibly disabled by
/// some artifact of the routine.
-bool CallAnalyzer::analyzeCall(CallSite CS) {
+InlineResult CallAnalyzer::analyzeCall(CallSite CS) {
++NumCallsAnalyzed;
// Perform some tweaks to the cost and threshold based on the direct
@@ -1714,6 +1731,13 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Update the threshold based on callsite properties
updateThreshold(CS, F);
+ // While Threshold depends on commandline options that can take negative
+ // values, we want to enforce the invariant that the computed threshold and
+ // bonuses are non-negative.
+ assert(Threshold >= 0);
+ assert(SingleBBBonus >= 0);
+ assert(VectorBonus >= 0);
+
// Speculatively apply all possible bonuses to Threshold. If cost exceeds
// this Threshold any time, and cost cannot decrease, we can stop processing
// the rest of the function body.
@@ -1730,7 +1754,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Check if we're done. This can happen due to bonuses and penalties.
if (Cost >= Threshold && !ComputeFullInlineCost)
- return false;
+ return "high cost";
if (F.empty())
return true;
@@ -1809,14 +1833,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// site. If the blockaddress escapes the function, e.g., via a global
// variable, inlining may lead to an invalid cross-function reference.
if (BB->hasAddressTaken())
- return false;
+ return "blockaddress";
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
- if (!analyzeBlock(BB, EphValues))
- return false;
+ InlineResult IR = analyzeBlock(BB, EphValues);
+ if (!IR)
+ return IR;
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
// Add in the live successors by first checking whether we have terminator
// that may be simplified based on the values simplified by this call.
@@ -1867,7 +1892,25 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
- return false;
+ return "noduplicate";
+
+ // Loops generally act a lot like calls in that they act like barriers to
+ // movement, require a certain amount of setup, etc. So when optimising for
+ // size, we penalise any call sites that perform loops. We do this after all
+ // other costs here, so will likely only be dealing with relatively small
+ // functions (and hence DT and LI will hopefully be cheap).
+ if (Caller->optForMinSize()) {
+ DominatorTree DT(F);
+ LoopInfo LI(DT);
+ int NumLoops = 0;
+ for (Loop *L : LI) {
+ // Ignore loops that will not be executed
+ if (DeadBlocks.count(L->getHeader()))
+ continue;
+ NumLoops++;
+ }
+ Cost += NumLoops * InlineConstants::CallPenalty;
+ }
// We applied the maximum possible vector bonus at the beginning. Now,
// subtract the excess bonus, if any, from the Threshold before
@@ -1961,7 +2004,7 @@ InlineCost llvm::getInlineCost(
// Cannot inline indirect calls.
if (!Callee)
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("indirect call");
// Never inline calls with byval arguments that does not have the alloca
// address space. Since byval arguments can be replaced with a copy to an
@@ -1973,54 +2016,59 @@ InlineCost llvm::getInlineCost(
if (CS.isByValArgument(I)) {
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
if (PTy->getAddressSpace() != AllocaAS)
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("byval arguments without alloca"
+ " address space");
}
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
if (CS.hasFnAttr(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
- return llvm::InlineCost::getAlways();
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getAlways("always inline attribute");
+ return llvm::InlineCost::getNever("inapplicable always inline attribute");
}
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
Function *Caller = CS.getCaller();
if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI))
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("conflicting attributes");
// Don't inline this call if the caller has the optnone attribute.
if (Caller->hasFnAttribute(Attribute::OptimizeNone))
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("optnone attribute");
// Don't inline a function that treats null pointer as valid into a caller
// that does not have this attribute.
if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined())
- return llvm::InlineCost::getNever();
+ return llvm::InlineCost::getNever("nullptr definitions incompatible");
+
+ // Don't inline functions which can be interposed at link-time.
+ if (Callee->isInterposable())
+ return llvm::InlineCost::getNever("interposable");
+
+ // Don't inline functions marked noinline.
+ if (Callee->hasFnAttribute(Attribute::NoInline))
+ return llvm::InlineCost::getNever("noinline function attribute");
- // Don't inline functions which can be interposed at link-time. Don't inline
- // functions marked noinline or call sites marked noinline.
- // Note: inlining non-exact non-interposable functions is fine, since we know
- // we have *a* correct implementation of the source level function.
- if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
- CS.isNoInline())
- return llvm::InlineCost::getNever();
+ // Don't inline call sites marked noinline.
+ if (CS.isNoInline())
+ return llvm::InlineCost::getNever("noinline call site attribute");
LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "... (caller:" << Caller->getName() << ")\n");
CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS,
Params);
- bool ShouldInline = CA.analyzeCall(CS);
+ InlineResult ShouldInline = CA.analyzeCall(CS);
LLVM_DEBUG(CA.dump());
// Check if there was a reason to force inlining or no inlining.
if (!ShouldInline && CA.getCost() < CA.getThreshold())
- return InlineCost::getNever();
+ return InlineCost::getNever(ShouldInline.message);
if (ShouldInline && CA.getCost() >= CA.getThreshold())
- return InlineCost::getAlways();
+ return InlineCost::getAlways("empty function");
return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
}
@@ -2058,9 +2106,8 @@ bool llvm::isInlineViable(Function &F) {
// Disallow inlining functions that call @llvm.localescape. Doing this
// correctly would require major changes to the inliner.
case llvm::Intrinsic::localescape:
- // Disallow inlining of functions that access VarArgs.
+ // Disallow inlining of functions that initialize VarArgs with va_start.
case llvm::Intrinsic::vastart:
- case llvm::Intrinsic::vaend:
return false;
}
}
diff --git a/lib/Analysis/InstructionPrecedenceTracking.cpp b/lib/Analysis/InstructionPrecedenceTracking.cpp
new file mode 100644
index 000000000000..816126f407ca
--- /dev/null
+++ b/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -0,0 +1,157 @@
+//===-- InstructionPrecedenceTracking.cpp -----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implements a class that is able to define some instructions as "special"
+// (e.g. as having implicit control flow, or writing memory, or having another
+// interesting property) and then efficiently answers queries of the types:
+// 1. Are there any special instructions in the block of interest?
+// 2. Return first of the special instructions in the given block;
+// 3. Check if the given instruction is preceeded by the first special
+// instruction in the same block.
+// The class provides caching that allows to answer these queries quickly. The
+// user must make sure that the cached data is invalidated properly whenever
+// a content of some tracked block is changed.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+static cl::opt<bool> ExpensiveAsserts(
+ "ipt-expensive-asserts",
+ cl::desc("Perform expensive assert validation on every query to Instruction"
+ " Precedence Tracking"),
+ cl::init(false), cl::Hidden);
+#endif
+
+const Instruction *InstructionPrecedenceTracking::getFirstSpecialInstruction(
+ const BasicBlock *BB) {
+#ifndef NDEBUG
+ // If there is a bug connected to invalid cache, turn on ExpensiveAsserts to
+ // catch this situation as early as possible.
+ if (ExpensiveAsserts)
+ validateAll();
+ else
+ validate(BB);
+#endif
+
+ if (FirstSpecialInsts.find(BB) == FirstSpecialInsts.end()) {
+ fill(BB);
+ assert(FirstSpecialInsts.find(BB) != FirstSpecialInsts.end() && "Must be!");
+ }
+ return FirstSpecialInsts[BB];
+}
+
+bool InstructionPrecedenceTracking::hasSpecialInstructions(
+ const BasicBlock *BB) {
+ return getFirstSpecialInstruction(BB) != nullptr;
+}
+
+bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction(
+ const Instruction *Insn) {
+ const Instruction *MaybeFirstSpecial =
+ getFirstSpecialInstruction(Insn->getParent());
+ return MaybeFirstSpecial && OI.dominates(MaybeFirstSpecial, Insn);
+}
+
+void InstructionPrecedenceTracking::fill(const BasicBlock *BB) {
+ FirstSpecialInsts.erase(BB);
+ for (auto &I : *BB)
+ if (isSpecialInstruction(&I)) {
+ FirstSpecialInsts[BB] = &I;
+ return;
+ }
+
+ // Mark this block as having no special instructions.
+ FirstSpecialInsts[BB] = nullptr;
+}
+
+#ifndef NDEBUG
+void InstructionPrecedenceTracking::validate(const BasicBlock *BB) const {
+ auto It = FirstSpecialInsts.find(BB);
+ // Bail if we don't have anything cached for this block.
+ if (It == FirstSpecialInsts.end())
+ return;
+
+ for (const Instruction &Insn : *BB)
+ if (isSpecialInstruction(&Insn)) {
+ assert(It->second == &Insn &&
+ "Cached first special instruction is wrong!");
+ return;
+ }
+
+ assert(It->second == nullptr &&
+ "Block is marked as having special instructions but in fact it has "
+ "none!");
+}
+
+void InstructionPrecedenceTracking::validateAll() const {
+ // Check that for every known block the cached value is correct.
+ for (auto &It : FirstSpecialInsts)
+ validate(It.first);
+}
+#endif
+
+void InstructionPrecedenceTracking::insertInstructionTo(const Instruction *Inst,
+ const BasicBlock *BB) {
+ if (isSpecialInstruction(Inst))
+ FirstSpecialInsts.erase(BB);
+ OI.invalidateBlock(BB);
+}
+
+void InstructionPrecedenceTracking::removeInstruction(const Instruction *Inst) {
+ if (isSpecialInstruction(Inst))
+ FirstSpecialInsts.erase(Inst->getParent());
+ OI.invalidateBlock(Inst->getParent());
+}
+
+void InstructionPrecedenceTracking::clear() {
+ for (auto It : FirstSpecialInsts)
+ OI.invalidateBlock(It.first);
+ FirstSpecialInsts.clear();
+#ifndef NDEBUG
+ // The map should be valid after clearing (at least empty).
+ validateAll();
+#endif
+}
+
+bool ImplicitControlFlowTracking::isSpecialInstruction(
+ const Instruction *Insn) const {
+ // If a block's instruction doesn't always pass the control to its successor
+ // instruction, mark the block as having implicit control flow. We use them
+ // to avoid wrong assumptions of sort "if A is executed and B post-dominates
+ // A, then B is also executed". This is not true is there is an implicit
+ // control flow instruction (e.g. a guard) between them.
+ //
+ // TODO: Currently, isGuaranteedToTransferExecutionToSuccessor returns false
+ // for volatile stores and loads because they can trap. The discussion on
+ // whether or not it is correct is still ongoing. We might want to get rid
+ // of this logic in the future. Anyways, trapping instructions shouldn't
+ // introduce implicit control flow, so we explicitly allow them here. This
+ // must be removed once isGuaranteedToTransferExecutionToSuccessor is fixed.
+ if (isGuaranteedToTransferExecutionToSuccessor(Insn))
+ return false;
+ if (isa<LoadInst>(Insn)) {
+ assert(cast<LoadInst>(Insn)->isVolatile() &&
+ "Non-volatile load should transfer execution to successor!");
+ return false;
+ }
+ if (isa<StoreInst>(Insn)) {
+ assert(cast<StoreInst>(Insn)->isVolatile() &&
+ "Non-volatile store should transfer execution to successor!");
+ return false;
+ }
+ return true;
+}
+
+bool MemoryWriteTracking::isSpecialInstruction(
+ const Instruction *Insn) const {
+ return Insn->mayWriteToMemory();
+}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 7fc7c15a0c25..ccf907c144f0 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -861,8 +861,10 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// (X / Y) * Y -> X if the division is exact.
Value *X = nullptr;
- if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
- match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y)
+ if (Q.IIQ.UseInstrInfo &&
+ (match(Op0,
+ m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
+ match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))) // Y * (X / Y)
return X;
// i1 mul -> and.
@@ -1035,8 +1037,8 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) {
auto *Mul = cast<OverflowingBinaryOperator>(Op0);
// If the Mul does not overflow, then we are good to go.
- if ((IsSigned && Mul->hasNoSignedWrap()) ||
- (!IsSigned && Mul->hasNoUnsignedWrap()))
+ if ((IsSigned && Q.IIQ.hasNoSignedWrap(Mul)) ||
+ (!IsSigned && Q.IIQ.hasNoUnsignedWrap(Mul)))
return X;
// If X has the form X = A / Y, then X * Y cannot overflow.
if ((IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) ||
@@ -1094,10 +1096,11 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
return Op0;
// (X << Y) % X -> 0
- if ((Opcode == Instruction::SRem &&
- match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
- (Opcode == Instruction::URem &&
- match(Op0, m_NUWShl(m_Specific(Op1), m_Value()))))
+ if (Q.IIQ.UseInstrInfo &&
+ ((Opcode == Instruction::SRem &&
+ match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
+ (Opcode == Instruction::URem &&
+ match(Op0, m_NUWShl(m_Specific(Op1), m_Value())))))
return Constant::getNullValue(Op0->getType());
// If the operation is with the result of a select instruction, check whether
@@ -1295,7 +1298,8 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// (X >> A) << A -> X
Value *X;
- if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
+ if (Q.IIQ.UseInstrInfo &&
+ match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
return X;
// shl nuw i8 C, %x -> C iff C has sign bit set.
@@ -1338,7 +1342,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
const unsigned Width = Op0->getType()->getScalarSizeInBits();
const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
- if (EffWidthY <= ShRAmt->getZExtValue())
+ if (ShRAmt->uge(EffWidthY))
return X;
}
@@ -1365,7 +1369,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
// (X << A) >> A -> X
Value *X;
- if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
+ if (Q.IIQ.UseInstrInfo && match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
return X;
// Arithmetic shifting an all-sign-bit value is a no-op.
@@ -1552,7 +1556,8 @@ static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1,
return nullptr;
}
-static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
// (icmp (add V, C0), C1) & (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
@@ -1563,13 +1568,13 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
return nullptr;
- auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ auto *AddInst = cast<OverflowingBinaryOperator>(Op0->getOperand(0));
if (AddInst->getOperand(1) != Op1->getOperand(1))
return nullptr;
Type *ITy = Op0->getType();
- bool isNSW = AddInst->hasNoSignedWrap();
- bool isNUW = AddInst->hasNoUnsignedWrap();
+ bool isNSW = IIQ.hasNoSignedWrap(AddInst);
+ bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
const APInt Delta = *C1 - *C0;
if (C0->isStrictlyPositive()) {
@@ -1598,7 +1603,8 @@ static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
return X;
if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true))
@@ -1615,15 +1621,16 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, IIQ))
return X;
- if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0))
+ if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, IIQ))
return X;
return nullptr;
}
-static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
// (icmp (add V, C0), C1) | (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
@@ -1639,8 +1646,8 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
Type *ITy = Op0->getType();
- bool isNSW = AddInst->hasNoSignedWrap();
- bool isNUW = AddInst->hasNoUnsignedWrap();
+ bool isNSW = IIQ.hasNoSignedWrap(AddInst);
+ bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
const APInt Delta = *C1 - *C0;
if (C0->isStrictlyPositive()) {
@@ -1669,7 +1676,8 @@ static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
+ const InstrInfoQuery &IIQ) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
return X;
if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false))
@@ -1686,15 +1694,16 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, IIQ))
return X;
- if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0))
+ if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, IIQ))
return X;
return nullptr;
}
-static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
+static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI,
+ FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
if (LHS0->getType() != RHS0->getType())
@@ -1711,8 +1720,8 @@ static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
// (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X
// (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y
// (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X
- if ((isKnownNeverNaN(LHS0) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
- (isKnownNeverNaN(LHS1) && (LHS0 == RHS0 || LHS0 == RHS1)))
+ if ((isKnownNeverNaN(LHS0, TLI) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
+ (isKnownNeverNaN(LHS1, TLI) && (LHS0 == RHS0 || LHS0 == RHS1)))
return RHS;
// (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y
@@ -1723,15 +1732,16 @@ static Value *simplifyAndOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
// (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X
// (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y
// (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X
- if ((isKnownNeverNaN(RHS0) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
- (isKnownNeverNaN(RHS1) && (RHS0 == LHS0 || RHS0 == LHS1)))
+ if ((isKnownNeverNaN(RHS0, TLI) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
+ (isKnownNeverNaN(RHS1, TLI) && (RHS0 == LHS0 || RHS0 == LHS1)))
return LHS;
}
return nullptr;
}
-static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) {
+static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
+ Value *Op0, Value *Op1, bool IsAnd) {
// Look through casts of the 'and' operands to find compares.
auto *Cast0 = dyn_cast<CastInst>(Op0);
auto *Cast1 = dyn_cast<CastInst>(Op1);
@@ -1745,13 +1755,13 @@ static Value *simplifyAndOrOfCmps(Value *Op0, Value *Op1, bool IsAnd) {
auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
if (ICmp0 && ICmp1)
- V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1) :
- simplifyOrOfICmps(ICmp0, ICmp1);
+ V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q.IIQ)
+ : simplifyOrOfICmps(ICmp0, ICmp1, Q.IIQ);
auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
if (FCmp0 && FCmp1)
- V = simplifyAndOrOfFCmps(FCmp0, FCmp1, IsAnd);
+ V = simplifyAndOrOfFCmps(Q.TLI, FCmp0, FCmp1, IsAnd);
if (!V)
return nullptr;
@@ -1831,7 +1841,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
return Op1;
}
- if (Value *V = simplifyAndOrOfCmps(Op0, Op1, true))
+ if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
return V;
// Try some generic simplifications for associative operations.
@@ -1863,6 +1873,40 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
MaxRecurse))
return V;
+ // Assuming the effective width of Y is not larger than A, i.e. all bits
+ // from X and Y are disjoint in (X << A) | Y,
+ // if the mask of this AND op covers all bits of X or Y, while it covers
+ // no bits from the other, we can bypass this AND op. E.g.,
+ // ((X << A) | Y) & Mask -> Y,
+ // if Mask = ((1 << effective_width_of(Y)) - 1)
+ // ((X << A) | Y) & Mask -> X << A,
+ // if Mask = ((1 << effective_width_of(X)) - 1) << A
+ // SimplifyDemandedBits in InstCombine can optimize the general case.
+ // This pattern aims to help other passes for a common case.
+ Value *Y, *XShifted;
+ if (match(Op1, m_APInt(Mask)) &&
+ match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
+ m_Value(XShifted)),
+ m_Value(Y)))) {
+ const unsigned Width = Op0->getType()->getScalarSizeInBits();
+ const unsigned ShftCnt = ShAmt->getLimitedValue(Width);
+ const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ if (EffWidthY <= ShftCnt) {
+ const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
+ Q.DT);
+ const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
+ const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
+ const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
+ // If the mask is extracting all bits from X or Y as is, we can skip
+ // this AND op.
+ if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
+ return Y;
+ if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
+ return XShifted;
+ }
+ }
+
return nullptr;
}
@@ -1947,7 +1991,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
return Op0;
- if (Value *V = simplifyAndOrOfCmps(Op0, Op1, false))
+ if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
// Try some generic simplifications for associative operations.
@@ -2108,13 +2152,15 @@ static Constant *
computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, CmpInst::Predicate Pred,
AssumptionCache *AC, const Instruction *CxtI,
- Value *LHS, Value *RHS) {
+ const InstrInfoQuery &IIQ, Value *LHS, Value *RHS) {
// First, skip past any trivial no-ops.
LHS = LHS->stripPointerCasts();
RHS = RHS->stripPointerCasts();
// A non-null pointer is not equal to a null pointer.
- if (llvm::isKnownNonZero(LHS, DL) && isa<ConstantPointerNull>(RHS) &&
+ if (llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
+ IIQ.UseInstrInfo) &&
+ isa<ConstantPointerNull>(RHS) &&
(Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
@@ -2379,12 +2425,12 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT: {
@@ -2429,17 +2475,18 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
/// Many binary operators with a constant operand have an easy-to-compute
/// range of outputs. This can be used to fold a comparison to always true or
/// always false.
-static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
+static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper,
+ const InstrInfoQuery &IIQ) {
unsigned Width = Lower.getBitWidth();
const APInt *C;
switch (BO.getOpcode()) {
case Instruction::Add:
if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
// FIXME: If we have both nuw and nsw, we should reduce the range further.
- if (BO.hasNoUnsignedWrap()) {
+ if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
// 'add nuw x, C' produces [C, UINT_MAX].
Lower = *C;
- } else if (BO.hasNoSignedWrap()) {
+ } else if (IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
if (C->isNegative()) {
// 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
Lower = APInt::getSignedMinValue(Width);
@@ -2472,7 +2519,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && BO.isExact())
+ if (!C->isNullValue() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
if (C->isNegative()) {
// 'ashr C, x' produces [C, C >> (Width-1)]
@@ -2493,7 +2540,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
} else if (match(BO.getOperand(0), m_APInt(C))) {
// 'lshr C, x' produces [C >> (Width-1), C].
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && BO.isExact())
+ if (!C->isNullValue() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
Lower = C->lshr(ShiftAmount);
Upper = *C + 1;
@@ -2502,7 +2549,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
case Instruction::Shl:
if (match(BO.getOperand(0), m_APInt(C))) {
- if (BO.hasNoUnsignedWrap()) {
+ if (IIQ.hasNoUnsignedWrap(&BO)) {
// 'shl nuw C, x' produces [C, C << CLZ(C)]
Lower = *C;
Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
@@ -2583,8 +2630,72 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
}
}
+/// Some intrinsics with a constant operand have an easy-to-compute range of
+/// outputs. This can be used to fold a comparison to always true or always
+/// false.
+static void setLimitsForIntrinsic(IntrinsicInst &II, APInt &Lower,
+ APInt &Upper) {
+ unsigned Width = Lower.getBitWidth();
+ const APInt *C;
+ switch (II.getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ // uadd.sat(x, C) produces [C, UINT_MAX].
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C)))
+ Lower = *C;
+ break;
+ case Intrinsic::sadd_sat:
+ if (match(II.getOperand(0), m_APInt(C)) ||
+ match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+ } else {
+ // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
+ Lower = APInt::getSignedMinValue(Width) + *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ }
+ break;
+ case Intrinsic::usub_sat:
+ // usub.sat(C, x) produces [0, C].
+ if (match(II.getOperand(0), m_APInt(C)))
+ Upper = *C + 1;
+ // usub.sat(x, C) produces [0, UINT_MAX - C].
+ else if (match(II.getOperand(1), m_APInt(C)))
+ Upper = APInt::getMaxValue(Width) - *C + 1;
+ break;
+ case Intrinsic::ssub_sat:
+ if (match(II.getOperand(0), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = *C - APInt::getSignedMinValue(Width) + 1;
+ } else {
+ // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
+ Lower = *C - APInt::getSignedMaxValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ } else if (match(II.getOperand(1), m_APInt(C))) {
+ if (C->isNegative()) {
+ // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
+ Lower = APInt::getSignedMinValue(Width) - *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ } else {
+ // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) - *C + 1;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS) {
+ Value *RHS, const InstrInfoQuery &IIQ) {
Type *ITy = GetCompareTy(RHS); // The return type.
Value *X;
@@ -2615,13 +2726,15 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
APInt Lower = APInt(Width, 0);
APInt Upper = APInt(Width, 0);
if (auto *BO = dyn_cast<BinaryOperator>(LHS))
- setLimitsForBinOp(*BO, Lower, Upper);
+ setLimitsForBinOp(*BO, Lower, Upper, IIQ);
+ else if (auto *II = dyn_cast<IntrinsicInst>(LHS))
+ setLimitsForIntrinsic(*II, Lower, Upper);
ConstantRange LHS_CR =
Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
if (auto *I = dyn_cast<Instruction>(LHS))
- if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+ if (auto *Ranges = IIQ.getMetadata(I, LLVMContext::MD_range))
LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
if (!LHS_CR.isFullSet()) {
@@ -2654,16 +2767,20 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
B = LBO->getOperand(1);
NoLHSWrapProblem =
ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+ (CmpInst::isUnsigned(Pred) &&
+ Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO))) ||
+ (CmpInst::isSigned(Pred) &&
+ Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)));
}
if (RBO && RBO->getOpcode() == Instruction::Add) {
C = RBO->getOperand(0);
D = RBO->getOperand(1);
NoRHSWrapProblem =
ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+ (CmpInst::isUnsigned(Pred) &&
+ Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(RBO))) ||
+ (CmpInst::isSigned(Pred) &&
+ Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RBO)));
}
// icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
@@ -2881,7 +2998,8 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// - The shift is nuw, we can't shift out the one bit.
// - CI2 is one
// - CI isn't zero
- if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
+ if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
+ Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
CI2Val->isOneValue() || !CI->isZero()) {
if (Pred == ICmpInst::ICMP_EQ)
return ConstantInt::getFalse(RHS->getContext());
@@ -2905,29 +3023,31 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
break;
case Instruction::UDiv:
case Instruction::LShr:
- if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact())
+ if (ICmpInst::isSigned(Pred) || !Q.IIQ.isExact(LBO) ||
+ !Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::SDiv:
- if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact())
+ if (!ICmpInst::isEquality(Pred) || !Q.IIQ.isExact(LBO) ||
+ !Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::AShr:
- if (!LBO->isExact() || !RBO->isExact())
+ if (!Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::Shl: {
- bool NUW = LBO->hasNoUnsignedWrap() && RBO->hasNoUnsignedWrap();
- bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+ bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO);
+ bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO);
if (!NUW && !NSW)
break;
if (!NSW && ICmpInst::isSigned(Pred))
@@ -2942,6 +3062,44 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
+static Value *simplifyICmpWithAbsNabs(CmpInst::Predicate Pred, Value *Op0,
+ Value *Op1) {
+ // We need a comparison with a constant.
+ const APInt *C;
+ if (!match(Op1, m_APInt(C)))
+ return nullptr;
+
+ // matchSelectPattern returns the negation part of an abs pattern in SP1.
+ // If the negate has an NSW flag, abs(INT_MIN) is undefined. Without that
+ // constraint, we can't make a contiguous range for the result of abs.
+ ICmpInst::Predicate AbsPred = ICmpInst::BAD_ICMP_PREDICATE;
+ Value *SP0, *SP1;
+ SelectPatternFlavor SPF = matchSelectPattern(Op0, SP0, SP1).Flavor;
+ if (SPF == SelectPatternFlavor::SPF_ABS &&
+ cast<Instruction>(SP1)->hasNoSignedWrap())
+ // The result of abs(X) is >= 0 (with nsw).
+ AbsPred = ICmpInst::ICMP_SGE;
+ if (SPF == SelectPatternFlavor::SPF_NABS)
+ // The result of -abs(X) is <= 0.
+ AbsPred = ICmpInst::ICMP_SLE;
+
+ if (AbsPred == ICmpInst::BAD_ICMP_PREDICATE)
+ return nullptr;
+
+ // If there is no intersection between abs/nabs and the range of this icmp,
+ // the icmp must be false. If the abs/nabs range is a subset of the icmp
+ // range, the icmp must be true.
+ APInt Zero = APInt::getNullValue(C->getBitWidth());
+ ConstantRange AbsRange = ConstantRange::makeExactICmpRegion(AbsPred, Zero);
+ ConstantRange CmpRange = ConstantRange::makeExactICmpRegion(Pred, *C);
+ if (AbsRange.intersectWith(CmpRange).isEmptySet())
+ return getFalse(GetCompareTy(Op0));
+ if (CmpRange.contains(AbsRange))
+ return getTrue(GetCompareTy(Op0));
+
+ return nullptr;
+}
+
/// Simplify integer comparisons where at least one operand of the compare
/// matches an integer min/max idiom.
static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
@@ -3175,7 +3333,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
return V;
- if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS))
+ if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q.IIQ))
return V;
// If both operands have range metadata, use the metadata
@@ -3184,8 +3342,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
auto RHS_Instr = cast<Instruction>(RHS);
auto LHS_Instr = cast<Instruction>(LHS);
- if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
- LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+ if (Q.IIQ.getMetadata(RHS_Instr, LLVMContext::MD_range) &&
+ Q.IIQ.getMetadata(LHS_Instr, LLVMContext::MD_range)) {
auto RHS_CR = getConstantRangeFromMetadata(
*RHS_Instr->getMetadata(LLVMContext::MD_range));
auto LHS_CR = getConstantRangeFromMetadata(
@@ -3363,7 +3521,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// icmp eq|ne X, Y -> false|true if X != Y
if (ICmpInst::isEquality(Pred) &&
- isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+ isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo)) {
return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy);
}
@@ -3373,11 +3531,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
return V;
+ if (Value *V = simplifyICmpWithAbsNabs(Pred, LHS, RHS))
+ return V;
+
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
- if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI, LHS,
- RHS))
+ if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI,
+ Q.IIQ, LHS, RHS))
return C;
if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
@@ -3386,7 +3547,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
Q.DL.getTypeSizeInBits(CRHS->getType()))
if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.AC, Q.CxtI,
- CLHS->getPointerOperand(),
+ Q.IIQ, CLHS->getPointerOperand(),
CRHS->getPointerOperand()))
return C;
@@ -3457,13 +3618,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Pred == FCmpInst::FCMP_TRUE)
return getTrue(RetTy);
- // UNO/ORD predicates can be trivially folded if NaNs are ignored.
- if (FMF.noNaNs()) {
- if (Pred == FCmpInst::FCMP_UNO)
- return getFalse(RetTy);
- if (Pred == FCmpInst::FCMP_ORD)
- return getTrue(RetTy);
- }
+ // Fold (un)ordered comparison if we can determine there are no NaNs.
+ if (Pred == FCmpInst::FCMP_UNO || Pred == FCmpInst::FCMP_ORD)
+ if (FMF.noNaNs() ||
+ (isKnownNeverNaN(LHS, Q.TLI) && isKnownNeverNaN(RHS, Q.TLI)))
+ return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
// NaN is unordered; NaN is not ordered.
assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) &&
@@ -3518,12 +3677,19 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
if (C->isZero()) {
switch (Pred) {
+ case FCmpInst::FCMP_OGE:
+ if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return getTrue(RetTy);
+ break;
case FCmpInst::FCMP_UGE:
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
return getTrue(RetTy);
break;
+ case FCmpInst::FCMP_ULT:
+ if (FMF.noNaNs() && CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ return getFalse(RetTy);
+ break;
case FCmpInst::FCMP_OLT:
- // X < 0
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
return getFalse(RetTy);
break;
@@ -3600,11 +3766,10 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
//
// We can't replace %sel with %add unless we strip away the flags.
if (isa<OverflowingBinaryOperator>(B))
- if (B->hasNoSignedWrap() || B->hasNoUnsignedWrap())
- return nullptr;
- if (isa<PossiblyExactOperator>(B))
- if (B->isExact())
+ if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
return nullptr;
+ if (isa<PossiblyExactOperator>(B) && Q.IIQ.isExact(B))
+ return nullptr;
if (MaxRecurse) {
if (B->getOperand(0) == Op)
@@ -3738,6 +3903,28 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
Pred == ICmpInst::ICMP_EQ))
return V;
+
+ // Test for zero-shift-guard-ops around funnel shifts. These are used to
+ // avoid UB from oversized shifts in raw IR rotate patterns, but the
+ // intrinsics do not have that problem.
+ Value *ShAmt;
+ auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
+ m_Value(ShAmt)),
+ m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
+ m_Value(ShAmt)));
+ // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt)
+ // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt)
+ // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
+ // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
+ if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
+ return Pred == ICmpInst::ICMP_NE ? TrueVal : X;
+
+ // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt)
+ // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt)
+ // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
+ // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
+ if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt)
+ return Pred == ICmpInst::ICMP_EQ ? FalseVal : X;
}
// Check for other compares that behave like bit test.
@@ -3775,6 +3962,34 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
return nullptr;
}
+/// Try to simplify a select instruction when its condition operand is a
+/// floating-point comparison.
+static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F) {
+ FCmpInst::Predicate Pred;
+ if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) &&
+ !match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T))))
+ return nullptr;
+
+ // TODO: The transform may not be valid with -0.0. An incomplete way of
+ // testing for that possibility is to check if at least one operand is a
+ // non-zero constant.
+ const APFloat *C;
+ if ((match(T, m_APFloat(C)) && C->isNonZero()) ||
+ (match(F, m_APFloat(C)) && C->isNonZero())) {
+ // (T == F) ? T : F --> F
+ // (F == T) ? T : F --> F
+ if (Pred == FCmpInst::FCMP_OEQ)
+ return F;
+
+ // (T != F) ? T : F --> T
+ // (F != T) ? T : F --> T
+ if (Pred == FCmpInst::FCMP_UNE)
+ return T;
+ }
+
+ return nullptr;
+}
+
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -3811,9 +4026,16 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
return V;
+ if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal))
+ return V;
+
if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
return V;
+ Optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
+ if (Imp)
+ return *Imp ? TrueVal : FalseVal;
+
return nullptr;
}
@@ -4325,6 +4547,14 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0)))))
return ConstantFP::getNullValue(Op0->getType());
+ // (X - Y) + Y --> X
+ // Y + (X - Y) --> X
+ Value *X;
+ if (FMF.noSignedZeros() && FMF.allowReassoc() &&
+ (match(Op0, m_FSub(m_Value(X), m_Specific(Op1))) ||
+ match(Op1, m_FSub(m_Value(X), m_Specific(Op0)))))
+ return X;
+
return nullptr;
}
@@ -4362,6 +4592,13 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (FMF.noNaNs() && Op0 == Op1)
return Constant::getNullValue(Op0->getType());
+ // Y - (Y - X) --> X
+ // (X + Y) - Y --> X
+ if (FMF.noSignedZeros() && FMF.allowReassoc() &&
+ (match(Op1, m_FSub(m_Specific(Op0), m_Value(X))) ||
+ match(Op0, m_c_FAdd(m_Specific(Op1), m_Value(X)))))
+ return X;
+
return nullptr;
}
@@ -4442,10 +4679,8 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// -X / X -> -1.0 and
// X / -X -> -1.0 are legal when NaNs are ignored.
// We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
- if ((BinaryOperator::isFNeg(Op0, /*IgnoreZeroSign=*/true) &&
- BinaryOperator::getFNegArgument(Op0) == Op1) ||
- (BinaryOperator::isFNeg(Op1, /*IgnoreZeroSign=*/true) &&
- BinaryOperator::getFNegArgument(Op1) == Op0))
+ if (match(Op0, m_FNegNSZ(m_Specific(Op1))) ||
+ match(Op1, m_FNegNSZ(m_Specific(Op0))))
return ConstantFP::get(Op0->getType(), -1.0);
}
@@ -4747,6 +4982,40 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
return Constant::getNullValue(ReturnType);
break;
+ case Intrinsic::uadd_sat:
+ // sat(MAX + X) -> MAX
+ // sat(X + MAX) -> MAX
+ if (match(Op0, m_AllOnes()) || match(Op1, m_AllOnes()))
+ return Constant::getAllOnesValue(ReturnType);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::sadd_sat:
+ // sat(X + undef) -> -1
+ // sat(undef + X) -> -1
+ // For unsigned: Assume undef is MAX, thus we saturate to MAX (-1).
+ // For signed: Assume undef is ~X, in which case X + ~X = -1.
+ if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return Constant::getAllOnesValue(ReturnType);
+
+ // X + 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+ // 0 + X -> X
+ if (match(Op0, m_Zero()))
+ return Op1;
+ break;
+ case Intrinsic::usub_sat:
+ // sat(0 - X) -> 0, sat(X - MAX) -> 0
+ if (match(Op0, m_Zero()) || match(Op1, m_AllOnes()))
+ return Constant::getNullValue(ReturnType);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::ssub_sat:
+ // X - X -> 0, X - undef -> 0, undef - X -> 0
+ if (Op0 == Op1 || match(Op0, m_Undef()) || match(Op1, m_Undef()))
+ return Constant::getNullValue(ReturnType);
+ // X - 0 -> X
+ if (match(Op1, m_Zero()))
+ return Op0;
+ break;
case Intrinsic::load_relative:
if (auto *C0 = dyn_cast<Constant>(Op0))
if (auto *C1 = dyn_cast<Constant>(Op1))
@@ -4764,10 +5033,51 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
break;
case Intrinsic::maxnum:
case Intrinsic::minnum:
- // If one argument is NaN, return the other argument.
- if (match(Op0, m_NaN())) return Op1;
- if (match(Op1, m_NaN())) return Op0;
+ case Intrinsic::maximum:
+ case Intrinsic::minimum: {
+ // If the arguments are the same, this is a no-op.
+ if (Op0 == Op1) return Op0;
+
+ // If one argument is undef, return the other argument.
+ if (match(Op0, m_Undef()))
+ return Op1;
+ if (match(Op1, m_Undef()))
+ return Op0;
+
+ // If one argument is NaN, return other or NaN appropriately.
+ bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
+ if (match(Op0, m_NaN()))
+ return PropagateNaN ? Op0 : Op1;
+ if (match(Op1, m_NaN()))
+ return PropagateNaN ? Op1 : Op0;
+
+ // Min/max of the same operation with common operand:
+ // m(m(X, Y)), X --> m(X, Y) (4 commuted variants)
+ if (auto *M0 = dyn_cast<IntrinsicInst>(Op0))
+ if (M0->getIntrinsicID() == IID &&
+ (M0->getOperand(0) == Op1 || M0->getOperand(1) == Op1))
+ return Op0;
+ if (auto *M1 = dyn_cast<IntrinsicInst>(Op1))
+ if (M1->getIntrinsicID() == IID &&
+ (M1->getOperand(0) == Op0 || M1->getOperand(1) == Op0))
+ return Op1;
+
+ // min(X, -Inf) --> -Inf (and commuted variant)
+ // max(X, +Inf) --> +Inf (and commuted variant)
+ bool UseNegInf = IID == Intrinsic::minnum || IID == Intrinsic::minimum;
+ const APFloat *C;
+ if ((match(Op0, m_APFloat(C)) && C->isInfinity() &&
+ C->isNegative() == UseNegInf) ||
+ (match(Op1, m_APFloat(C)) && C->isInfinity() &&
+ C->isNegative() == UseNegInf))
+ return ConstantFP::getInfinity(ReturnType, UseNegInf);
+
+ // TODO: minnum(nnan x, inf) -> x
+ // TODO: minnum(nnan ninf x, flt_max) -> x
+ // TODO: maxnum(nnan x, -inf) -> x
+ // TODO: maxnum(nnan ninf x, -flt_max) -> x
break;
+ }
default:
break;
}
@@ -4802,7 +5112,16 @@ static Value *simplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
- Value *ShAmtArg = ArgBegin[2];
+ Value *Op0 = ArgBegin[0], *Op1 = ArgBegin[1], *ShAmtArg = ArgBegin[2];
+
+ // If both operands are undef, the result is undef.
+ if (match(Op0, m_Undef()) && match(Op1, m_Undef()))
+ return UndefValue::get(F->getReturnType());
+
+ // If shift amount is undef, assume it is zero.
+ if (match(ShAmtArg, m_Undef()))
+ return ArgBegin[IID == Intrinsic::fshl ? 0 : 1];
+
const APInt *ShAmtC;
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
@@ -4889,18 +5208,20 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
I->getFastMathFlags(), Q);
break;
case Instruction::Add:
- Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+ Result =
+ SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+ Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FSub:
Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
I->getFastMathFlags(), Q);
break;
case Instruction::Sub:
- Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+ Result =
+ SimplifySubInst(I->getOperand(0), I->getOperand(1),
+ Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FMul:
Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
@@ -4930,17 +5251,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
I->getFastMathFlags(), Q);
break;
case Instruction::Shl:
- Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
+ Result =
+ SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+ Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::And:
Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q);
@@ -5066,7 +5388,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
- if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
+ if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
!I->mayHaveSideEffects())
I->eraseFromParent();
} else {
@@ -5095,7 +5417,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
- if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
+ if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
!I->mayHaveSideEffects())
I->eraseFromParent();
}
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
index e7751d32aab3..000fe5ddad54 100644
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -17,6 +17,7 @@
#include <queue>
namespace llvm {
+
template <class NodeTy, bool IsPostDom>
void IDFCalculator<NodeTy, IsPostDom>::calculate(
SmallVectorImpl<BasicBlock *> &PHIBlocks) {
@@ -61,29 +62,39 @@ void IDFCalculator<NodeTy, IsPostDom>::calculate(
BasicBlock *BB = Node->getBlock();
// Succ is the successor in the direction we are calculating IDF, so it is
// successor for IDF, and predecessor for Reverse IDF.
- for (auto *Succ : children<NodeTy>(BB)) {
+ auto DoWork = [&](BasicBlock *Succ) {
DomTreeNode *SuccNode = DT.getNode(Succ);
// Quickly skip all CFG edges that are also dominator tree edges instead
// of catching them below.
if (SuccNode->getIDom() == Node)
- continue;
+ return;
const unsigned SuccLevel = SuccNode->getLevel();
if (SuccLevel > RootLevel)
- continue;
+ return;
if (!VisitedPQ.insert(SuccNode).second)
- continue;
+ return;
BasicBlock *SuccBB = SuccNode->getBlock();
if (useLiveIn && !LiveInBlocks->count(SuccBB))
- continue;
+ return;
PHIBlocks.emplace_back(SuccBB);
if (!DefBlocks->count(SuccBB))
PQ.push(std::make_pair(
SuccNode, std::make_pair(SuccLevel, SuccNode->getDFSNumIn())));
+ };
+
+ if (GD) {
+ for (auto Pair : children<
+ std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, NodeTy>>(
+ {GD, BB}))
+ DoWork(Pair.second);
+ } else {
+ for (auto *Succ : children<NodeTy>(BB))
+ DoWork(Succ);
}
for (auto DomChild : *Node) {
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index b1d585bfc683..3f22ada803c9 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -619,7 +619,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(
// If the merge range is empty, then adding the edge didn't actually form any
// new cycles. We're done.
- if (MergeRange.begin() == MergeRange.end()) {
+ if (empty(MergeRange)) {
// Now that the SCC structure is finalized, flip the kind to call.
SourceN->setEdgeKind(TargetN, Edge::Call);
return false; // No new cycle.
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index ee0148e0d795..110c085d3f35 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -420,6 +421,8 @@ namespace {
BasicBlock *BB);
bool solveBlockValueSelect(ValueLatticeElement &BBLV, SelectInst *S,
BasicBlock *BB);
+ Optional<ConstantRange> getRangeForOperand(unsigned Op, Instruction *I,
+ BasicBlock *BB);
bool solveBlockValueBinaryOp(ValueLatticeElement &BBLV, BinaryOperator *BBI,
BasicBlock *BB);
bool solveBlockValueCast(ValueLatticeElement &BBLV, CastInst *CI,
@@ -634,8 +637,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(ValueLatticeElement &Res,
if (auto *CI = dyn_cast<CastInst>(BBI))
return solveBlockValueCast(Res, CI, BB);
- BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
- if (BO && isa<ConstantInt>(BO->getOperand(1)))
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
return solveBlockValueBinaryOp(Res, BO, BB);
}
@@ -951,6 +953,25 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
return true;
}
+Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
+ Instruction *I,
+ BasicBlock *BB) {
+ if (!hasBlockValue(I->getOperand(Op), BB))
+ if (pushBlockValue(std::make_pair(BB, I->getOperand(Op))))
+ return None;
+
+ const unsigned OperandBitWidth =
+ DL.getTypeSizeInBits(I->getOperand(Op)->getType());
+ ConstantRange Range = ConstantRange(OperandBitWidth);
+ if (hasBlockValue(I->getOperand(Op), BB)) {
+ ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB);
+ intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
+ if (Val.isConstantRange())
+ Range = Val.getConstantRange();
+ }
+ return Range;
+}
+
bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
CastInst *CI,
BasicBlock *BB) {
@@ -981,21 +1002,11 @@ bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
// Figure out the range of the LHS. If that fails, we still apply the
// transfer rule on the full set since we may be able to locally infer
// interesting facts.
- if (!hasBlockValue(CI->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, CI->getOperand(0))))
- // More work to do before applying this transfer rule.
- return false;
-
- const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(CI->getOperand(0)->getType());
- ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(CI->getOperand(0), BB)) {
- ValueLatticeElement LHSVal = getBlockValue(CI->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal,
- CI);
- if (LHSVal.isConstantRange())
- LHSRange = LHSVal.getConstantRange();
- }
+ Optional<ConstantRange> LHSRes = getRangeForOperand(0, CI, BB);
+ if (!LHSRes.hasValue())
+ // More work to do before applying this transfer rule.
+ return false;
+ ConstantRange LHSRange = LHSRes.getValue();
const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
@@ -1037,27 +1048,19 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(ValueLatticeElement &BBLV,
return true;
};
- // Figure out the range of the LHS. If that fails, use a conservative range,
- // but apply the transfer rule anyways. This lets us pick up facts from
- // expressions like "and i32 (call i32 @foo()), 32"
- if (!hasBlockValue(BO->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, BO->getOperand(0))))
- // More work to do before applying this transfer rule.
- return false;
+ // Figure out the ranges of the operands. If that fails, use a
+ // conservative range, but apply the transfer rule anyways. This
+ // lets us pick up facts from expressions like "and i32 (call i32
+ // @foo()), 32"
+ Optional<ConstantRange> LHSRes = getRangeForOperand(0, BO, BB);
+ Optional<ConstantRange> RHSRes = getRangeForOperand(1, BO, BB);
- const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(BO->getOperand(0)->getType());
- ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(BO->getOperand(0), BB)) {
- ValueLatticeElement LHSVal = getBlockValue(BO->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal,
- BO);
- if (LHSVal.isConstantRange())
- LHSRange = LHSVal.getConstantRange();
- }
+ if (!LHSRes.hasValue() || !RHSRes.hasValue())
+ // More work to do before applying this transfer rule.
+ return false;
- ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1));
- ConstantRange RHSRange = ConstantRange(RHS->getValue());
+ ConstantRange LHSRange = LHSRes.getValue();
+ ConstantRange RHSRange = RHSRes.getValue();
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
diff --git a/lib/Analysis/LegacyDivergenceAnalysis.cpp b/lib/Analysis/LegacyDivergenceAnalysis.cpp
new file mode 100644
index 000000000000..5540859ebdda
--- /dev/null
+++ b/lib/Analysis/LegacyDivergenceAnalysis.cpp
@@ -0,0 +1,391 @@
+//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
+//Implementation -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements divergence analysis which determines whether a branch
+// in a GPU program is divergent.It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+// Divergence Analysis
+// Sampaio, Souza, Collange, Pereira
+// TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+// %cond = icmp slt i32 %tid, 10
+// br i1 %cond, label %then, label %else
+// then:
+// br label %merge
+// else:
+// br label %merge
+// merge:
+// %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+// non-kernel-entry function and the return value of a function call as
+// divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+// generic or local address as divergent. This can be improved by leveraging
+// pointer analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+// transparently use the GPUDivergenceAnalysis
+static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
+ cl::Hidden,
+ cl::desc("turn the LegacyDivergenceAnalysis into "
+ "a wrapper for GPUDivergenceAnalysis"));
+
+namespace {
+
+class DivergencePropagator {
+public:
+ DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+ PostDominatorTree &PDT, DenseSet<const Value *> &DV)
+ : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+ void populateWithSourcesOfDivergence();
+ void propagate();
+
+private:
+ // A helper function that explores data dependents of V.
+ void exploreDataDependency(Value *V);
+ // A helper function that explores sync dependents of TI.
+ void exploreSyncDependency(Instruction *TI);
+ // Computes the influence region from Start to End. This region includes all
+ // basic blocks on any simple path from Start to End.
+ void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion);
+ // Finds all users of I that are outside the influence region, and add these
+ // users to Worklist.
+ void findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+ Function &F;
+ TargetTransformInfo &TTI;
+ DominatorTree &DT;
+ PostDominatorTree &PDT;
+ std::vector<Value *> Worklist; // Stack for DFS.
+ DenseSet<const Value *> &DV; // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+ Worklist.clear();
+ DV.clear();
+ for (auto &I : instructions(F)) {
+ if (TTI.isSourceOfDivergence(&I)) {
+ Worklist.push_back(&I);
+ DV.insert(&I);
+ }
+ }
+ for (auto &Arg : F.args()) {
+ if (TTI.isSourceOfDivergence(&Arg)) {
+ Worklist.push_back(&Arg);
+ DV.insert(&Arg);
+ }
+ }
+}
+
+void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
+ // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+ // immediate post dominator are divergent. This rule handles if-then-else
+ // patterns. For example,
+ //
+ // if (tid < 5)
+ // a1 = 1;
+ // else
+ // a2 = 2;
+ // a = phi(a1, a2); // sync dependent on (tid < 5)
+ BasicBlock *ThisBB = TI->getParent();
+
+ // Unreachable blocks may not be in the dominator tree.
+ if (!DT.isReachableFromEntry(ThisBB))
+ return;
+
+ // If the function has no exit blocks or doesn't reach any exit blocks, the
+ // post dominator may be null.
+ DomTreeNode *ThisNode = PDT.getNode(ThisBB);
+ if (!ThisNode)
+ return;
+
+ BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
+ if (IPostDom == nullptr)
+ return;
+
+ for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+ // A PHINode is uniform if it returns the same value no matter which path is
+ // taken.
+ if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
+ Worklist.push_back(&*I);
+ }
+
+ // Propagation rule 2: if a value defined in a loop is used outside, the user
+ // is sync dependent on the condition of the loop exits that dominate the
+ // user. For example,
+ //
+ // int i = 0;
+ // do {
+ // i++;
+ // if (foo(i)) ... // uniform
+ // } while (i < tid);
+ // if (bar(i)) ... // divergent
+ //
+ // A program may contain unstructured loops. Therefore, we cannot leverage
+ // LoopInfo, which only recognizes natural loops.
+ //
+ // The algorithm used here handles both natural and unstructured loops. Given
+ // a branch TI, we first compute its influence region, the union of all simple
+ // paths from TI to its immediate post dominator (IPostDom). Then, we search
+ // for all the values defined in the influence region but used outside. All
+ // these users are sync dependent on TI.
+ DenseSet<BasicBlock *> InfluenceRegion;
+ computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+ // An insight that can speed up the search process is that all the in-region
+ // values that are used outside must dominate TI. Therefore, instead of
+ // searching every basic blocks in the influence region, we search all the
+ // dominators of TI until it is outside the influence region.
+ BasicBlock *InfluencedBB = ThisBB;
+ while (InfluenceRegion.count(InfluencedBB)) {
+ for (auto &I : *InfluencedBB)
+ findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+ if (IDomNode == nullptr)
+ break;
+ InfluencedBB = IDomNode->getBlock();
+ }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+ for (User *U : I.users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (!InfluenceRegion.count(UserInst->getParent())) {
+ if (DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+ }
+}
+
+// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
+// to the influence region.
+static void
+addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion,
+ std::vector<BasicBlock *> &InfluenceStack) {
+ for (BasicBlock *Succ : successors(ThisBB)) {
+ if (Succ != End && InfluenceRegion.insert(Succ).second)
+ InfluenceStack.push_back(Succ);
+ }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+ BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion) {
+ assert(PDT.properlyDominates(End, Start) &&
+ "End does not properly dominate Start");
+
+ // The influence region starts from the end of "Start" to the beginning of
+ // "End". Therefore, "Start" should not be in the region unless "Start" is in
+ // a loop that doesn't contain "End".
+ std::vector<BasicBlock *> InfluenceStack;
+ addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
+ while (!InfluenceStack.empty()) {
+ BasicBlock *BB = InfluenceStack.back();
+ InfluenceStack.pop_back();
+ addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
+ }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+ // Follow def-use chains of V.
+ for (User *U : V->users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+}
+
+void DivergencePropagator::propagate() {
+ // Traverse the dependency graph using DFS.
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ // Terminators with less than two successors won't introduce sync
+ // dependency. Ignore them.
+ if (I->isTerminator() && I->getNumSuccessors() > 1)
+ exploreSyncDependency(I);
+ }
+ exploreDataDependency(V);
+ }
+}
+
+} // namespace
+
+// Register this pass.
+char LegacyDivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
+ "Legacy Divergence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence",
+ "Legacy Divergence Analysis", false, true)
+
+FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
+ return new LegacyDivergenceAnalysis();
+}
+
+void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ if (UseGPUDA)
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+bool LegacyDivergenceAnalysis::shouldUseGPUDivergenceAnalysis(
+ const Function &F) const {
+ if (!UseGPUDA)
+ return false;
+
+ // GPUDivergenceAnalysis requires a reducible CFG.
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ using RPOTraversal = ReversePostOrderTraversal<const Function *>;
+ RPOTraversal FuncRPOT(&F);
+ return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
+ const LoopInfo>(FuncRPOT, LI);
+}
+
+bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ if (TTIWP == nullptr)
+ return false;
+
+ TargetTransformInfo &TTI = TTIWP->getTTI(F);
+ // Fast path: if the target does not have branch divergence, we do not mark
+ // any branch as divergent.
+ if (!TTI.hasBranchDivergence())
+ return false;
+
+ DivergentValues.clear();
+ gpuDA = nullptr;
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+
+ if (shouldUseGPUDivergenceAnalysis(F)) {
+ // run the new GPU divergence analysis
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ gpuDA = llvm::make_unique<GPUDivergenceAnalysis>(F, DT, PDT, LI, TTI);
+
+ } else {
+ // run LLVM's existing DivergenceAnalysis
+ DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues);
+ DP.populateWithSourcesOfDivergence();
+ DP.propagate();
+ }
+
+ LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
+ << ":\n";
+ print(dbgs(), F.getParent()));
+
+ return false;
+}
+
+bool LegacyDivergenceAnalysis::isDivergent(const Value *V) const {
+ if (gpuDA) {
+ return gpuDA->isDivergent(*V);
+ }
+ return DivergentValues.count(V);
+}
+
+void LegacyDivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+ if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
+ return;
+
+ const Function *F = nullptr;
+ if (!DivergentValues.empty()) {
+ const Value *FirstDivergentValue = *DivergentValues.begin();
+ if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+ F = Arg->getParent();
+ } else if (const Instruction *I =
+ dyn_cast<Instruction>(FirstDivergentValue)) {
+ F = I->getParent()->getParent();
+ } else {
+ llvm_unreachable("Only arguments and instructions can be divergent");
+ }
+ } else if (gpuDA) {
+ F = &gpuDA->getFunction();
+ }
+ if (!F)
+ return;
+
+ // Dumps all divergent values in F, arguments and then instructions.
+ for (auto &Arg : F->args()) {
+ OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
+ OS << Arg << "\n";
+ }
+ // Iterate instructions using instructions() to ensure a deterministic order.
+ for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
+ auto &BB = *BI;
+ OS << "\n " << BB.getName() << ":\n";
+ for (auto &I : BB.instructionsWithoutDebug()) {
+ OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
+ OS << I << "\n";
+ }
+ }
+ OS << "\n";
+}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index db919bd233bf..5d0a627f8426 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -330,12 +330,12 @@ void Lint::visitCallSite(CallSite CS) {
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
// isn't expressive enough for what we really want to do. Known partial
// overlap is not distinguished from the case where nothing is known.
- uint64_t Size = 0;
+ auto Size = LocationSize::unknown();
if (const ConstantInt *Len =
dyn_cast<ConstantInt>(findValue(MCI->getLength(),
/*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
- Size = Len->getValue().getZExtValue();
+ Size = LocationSize::precise(Len->getValue().getZExtValue());
Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
MustAlias,
"Undefined behavior: memcpy source and destination overlap", &I);
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index d319d4c249d3..8129795bc0c1 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -107,8 +107,8 @@ static bool isDereferenceableAndAlignedPointer(
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, Size,
DL, CtxI, DT, Visited);
- if (auto CS = ImmutableCallSite(V))
- if (auto *RP = getArgumentAliasingToReturnedPointer(CS))
+ if (const auto *Call = dyn_cast<CallBase>(V))
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call))
return isDereferenceableAndAlignedPointer(RP, Align, Size, DL, CtxI, DT,
Visited);
@@ -345,7 +345,7 @@ Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
// Try to get the store size for the type.
- uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
+ auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));
Value *StrippedPtr = Ptr->stripPointerCasts();
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index a24d66011b8d..7f3480f512ab 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -342,7 +342,7 @@ void RuntimePointerChecking::groupChecks(
//
// The above case requires that we have an UnknownDependence between
// accesses to the same underlying object. This cannot happen unless
- // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+ // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
// is also false. In this case we will use the fallback path and create
// separate checking groups for all pointers.
@@ -420,7 +420,7 @@ void RuntimePointerChecking::groupChecks(
// We've computed the grouped checks for this partition.
// Save the results and continue with the next one.
- std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+ llvm::copy(Groups, std::back_inserter(CheckingGroups));
}
}
@@ -509,7 +509,7 @@ public:
/// Register a load and whether it is only read from.
void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+ AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, false));
if (IsReadOnly)
ReadOnlyPtr.insert(Ptr);
@@ -518,7 +518,7 @@ public:
/// Register a store.
void addStore(MemoryLocation &Loc) {
Value *Ptr = const_cast<Value*>(Loc.Ptr);
- AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
+ AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
Accesses.insert(MemAccessInfo(Ptr, true));
}
@@ -556,7 +556,7 @@ public:
/// perform dependency checking.
///
/// Note that this can later be cleared if we retry memcheck analysis without
- /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
+ /// dependency checking (i.e. FoundNonConstantDistanceDependence).
bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
/// We decided that no dependence analysis would be used. Reset the state.
@@ -604,8 +604,8 @@ private:
///
/// Note that, this is different from isDependencyCheckNeeded. When we retry
/// memcheck analysis without dependency checking
- /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
- /// while this remains set if we have potentially dependent accesses.
+ /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
+ /// cleared while this remains set if we have potentially dependent accesses.
bool IsRTCheckAnalysisNeeded;
/// The SCEV predicate containing all the SCEV-related assumptions.
@@ -1221,18 +1221,20 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return X == PtrSCEVB;
}
-bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+MemoryDepChecker::VectorizationSafetyStatus
+MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
case NoDep:
case Forward:
case BackwardVectorizable:
- return true;
+ return VectorizationSafetyStatus::Safe;
case Unknown:
+ return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
case ForwardButPreventsForwarding:
case Backward:
case BackwardVectorizableButPreventsForwarding:
- return false;
+ return VectorizationSafetyStatus::Unsafe;
}
llvm_unreachable("unexpected DepType!");
}
@@ -1317,6 +1319,11 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return false;
}
+void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
+ if (Status < S)
+ Status = S;
+}
+
/// Given a non-constant (unknown) dependence-distance \p Dist between two
/// memory accesses, that have the same stride whose absolute value is given
/// in \p Stride, and that have the same type size \p TypeByteSize,
@@ -1485,7 +1492,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::NoDep;
LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
- ShouldRetryWithRuntimeCheck = true;
+ FoundNonConstantDistanceDependence = true;
return Dependence::Unknown;
}
@@ -1652,7 +1659,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
Dependence::DepType Type =
isDependent(*A.first, A.second, *B.first, B.second, Strides);
- SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+ mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
// dependences. In that case return as soon as we find the first
@@ -1669,7 +1676,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
<< "Too many dependences, stopped recording\n");
}
}
- if (!RecordDependences && !SafeForVectorization)
+ if (!RecordDependences && !isSafeForVectorization())
return false;
}
++OI;
@@ -1679,7 +1686,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
}
LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
- return SafeForVectorization;
+ return isSafeForVectorization();
}
SmallVector<Instruction *, 4>
@@ -1862,10 +1869,17 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
// writes and between reads and writes, but not between reads and reads.
ValueSet Seen;
+ // Record uniform store addresses to identify if we have multiple stores
+ // to the same address.
+ ValueSet UniformStores;
+
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
- // Check for store to loop invariant address.
- StoreToLoopInvariantAddress |= isUniform(Ptr);
+
+ if (isUniform(Ptr))
+ HasDependenceInvolvingLoopInvariantAddress |=
+ !UniformStores.insert(Ptr).second;
+
// If we did *not* see this pointer before, insert it to the read-write
// list. At this phase it is only a 'write' list.
if (Seen.insert(Ptr).second) {
@@ -1907,6 +1921,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
IsReadOnlyPtr = true;
}
+ // See if there is an unsafe dependency between a load to a uniform address and
+ // store to the same uniform address.
+ if (UniformStores.count(Ptr)) {
+ LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
+ "load and uniform store to the same address!\n");
+ HasDependenceInvolvingLoopInvariantAddress = true;
+ }
+
MemoryLocation Loc = MemoryLocation::get(LD);
// The TBAA metadata could have a control dependency on the predication
// condition, so we cannot rely on it when determining whether or not we
@@ -2265,7 +2287,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
- StoreToLoopInvariantAddress(false) {
+ HasDependenceInvolvingLoopInvariantAddress(false) {
if (canAnalyzeLoop())
analyzeLoop(AA, LI, TLI, DT);
}
@@ -2297,8 +2319,8 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
PtrRtChecking->print(OS, Depth);
OS << "\n";
- OS.indent(Depth) << "Store to invariant address was "
- << (StoreToLoopInvariantAddress ? "" : "not ")
+ OS.indent(Depth) << "Non vectorizable stores to invariant address were "
+ << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
OS.indent(Depth) << "SCEV assumptions:\n";
diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp
index 074023a7e1e2..2a3b29d7fbca 100644
--- a/lib/Analysis/LoopAnalysisManager.cpp
+++ b/lib/Analysis/LoopAnalysisManager.cpp
@@ -147,8 +147,8 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PA.preserve<LoopAnalysis>();
PA.preserve<LoopAnalysisManagerFunctionProxy>();
PA.preserve<ScalarEvolutionAnalysis>();
- // FIXME: Uncomment this when all loop passes preserve MemorySSA
- // PA.preserve<MemorySSAAnalysis>();
+ if (EnableMSSALoopDependency)
+ PA.preserve<MemorySSAAnalysis>();
// FIXME: What we really want to do here is preserve an AA category, but that
// concept doesn't exist yet.
PA.preserve<AAManager>();
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 3f78456b3586..ef2b1257015c 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -213,33 +214,21 @@ bool Loop::isSafeToClone() const {
MDNode *Loop::getLoopID() const {
MDNode *LoopID = nullptr;
- if (BasicBlock *Latch = getLoopLatch()) {
- LoopID = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
- } else {
- assert(!getLoopLatch() &&
- "The loop should have no single latch at this point");
- // Go through each predecessor of the loop header and check the
- // terminator for the metadata.
- BasicBlock *H = getHeader();
- for (BasicBlock *BB : this->blocks()) {
- TerminatorInst *TI = BB->getTerminator();
- MDNode *MD = nullptr;
-
- // Check if this terminator branches to the loop header.
- for (BasicBlock *Successor : TI->successors()) {
- if (Successor == H) {
- MD = TI->getMetadata(LLVMContext::MD_loop);
- break;
- }
- }
- if (!MD)
- return nullptr;
- if (!LoopID)
- LoopID = MD;
- else if (MD != LoopID)
- return nullptr;
- }
+ // Go through the latch blocks and check the terminator for the metadata.
+ SmallVector<BasicBlock *, 4> LatchesBlocks;
+ getLoopLatches(LatchesBlocks);
+ for (BasicBlock *BB : LatchesBlocks) {
+ Instruction *TI = BB->getTerminator();
+ MDNode *MD = TI->getMetadata(LLVMContext::MD_loop);
+
+ if (!MD)
+ return nullptr;
+
+ if (!LoopID)
+ LoopID = MD;
+ else if (MD != LoopID)
+ return nullptr;
}
if (!LoopID || LoopID->getNumOperands() == 0 ||
LoopID->getOperand(0) != LoopID)
@@ -248,23 +237,19 @@ MDNode *Loop::getLoopID() const {
}
void Loop::setLoopID(MDNode *LoopID) const {
- assert(LoopID && "Loop ID should not be null");
- assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
- assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
+ assert((!LoopID || LoopID->getNumOperands() > 0) &&
+ "Loop ID needs at least one operand");
+ assert((!LoopID || LoopID->getOperand(0) == LoopID) &&
+ "Loop ID should refer to itself");
- if (BasicBlock *Latch = getLoopLatch()) {
- Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
- return;
- }
-
- assert(!getLoopLatch() &&
- "The loop should have no single latch at this point");
BasicBlock *H = getHeader();
for (BasicBlock *BB : this->blocks()) {
- TerminatorInst *TI = BB->getTerminator();
- for (BasicBlock *Successor : TI->successors()) {
- if (Successor == H)
+ Instruction *TI = BB->getTerminator();
+ for (BasicBlock *Successor : successors(TI)) {
+ if (Successor == H) {
TI->setMetadata(LLVMContext::MD_loop, LoopID);
+ break;
+ }
}
}
}
@@ -308,16 +293,50 @@ bool Loop::isAnnotatedParallel() const {
if (!DesiredLoopIdMetadata)
return false;
+ MDNode *ParallelAccesses =
+ findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
+ SmallPtrSet<MDNode *, 4>
+ ParallelAccessGroups; // For scalable 'contains' check.
+ if (ParallelAccesses) {
+ for (const MDOperand &MD : drop_begin(ParallelAccesses->operands(), 1)) {
+ MDNode *AccGroup = cast<MDNode>(MD.get());
+ assert(isValidAsAccessGroup(AccGroup) &&
+ "List item must be an access group");
+ ParallelAccessGroups.insert(AccGroup);
+ }
+ }
+
// The loop branch contains the parallel loop metadata. In order to ensure
// that any parallel-loop-unaware optimization pass hasn't added loop-carried
// dependencies (thus converted the loop back to a sequential loop), check
- // that all the memory instructions in the loop contain parallelism metadata
- // that point to the same unique "loop id metadata" the loop branch does.
+ // that all the memory instructions in the loop belong to an access group that
+ // is parallel to this loop.
for (BasicBlock *BB : this->blocks()) {
for (Instruction &I : *BB) {
if (!I.mayReadOrWriteMemory())
continue;
+ if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
+ auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
+ if (AG->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+ return ParallelAccessGroups.count(AG);
+ }
+
+ for (const MDOperand &AccessListItem : AG->operands()) {
+ MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+ assert(isValidAsAccessGroup(AccGroup) &&
+ "List item must be an access group");
+ if (ParallelAccessGroups.count(AccGroup))
+ return true;
+ }
+ return false;
+ };
+
+ if (ContainsAccessGroup(AccessGroup))
+ continue;
+ }
+
// The memory instruction can refer to the loop identifier metadata
// directly or indirectly through another list metadata (in case of
// nested parallel loops). The loop identifier metadata refers to
@@ -708,6 +727,40 @@ void llvm::printLoop(Loop &L, raw_ostream &OS, const std::string &Banner) {
}
}
+MDNode *llvm::findOptionMDForLoopID(MDNode *LoopID, StringRef Name) {
+ // No loop metadata node, no loop properties.
+ if (!LoopID)
+ return nullptr;
+
+ // First operand should refer to the metadata node itself, for legacy reasons.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ // Iterate over the metdata node operands and look for MDString metadata.
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD || MD->getNumOperands() < 1)
+ continue;
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S)
+ continue;
+ // Return the operand node if MDString holds expected metadata.
+ if (Name.equals(S->getString()))
+ return MD;
+ }
+
+ // Loop property not found.
+ return nullptr;
+}
+
+MDNode *llvm::findOptionMDForLoop(const Loop *TheLoop, StringRef Name) {
+ return findOptionMDForLoopID(TheLoop->getLoopID(), Name);
+}
+
+bool llvm::isValidAsAccessGroup(MDNode *Node) {
+ return Node->getNumOperands() == 0 && Node->isDistinct();
+}
+
//===----------------------------------------------------------------------===//
// LoopInfo implementation
//
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 07a151ce0fce..a68f114b83a0 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -193,8 +194,14 @@ bool LPPassManager::runOnFunction(Function &F) {
}
// Walk Loops
- unsigned InstrCount = 0;
+ unsigned InstrCount, FunctionSize = 0;
+ StringMap<std::pair<unsigned, unsigned>> FunctionToInstrCount;
bool EmitICRemark = M.shouldEmitInstrCountChangedRemark();
+ // Collect the initial size of the module and the function we're looking at.
+ if (EmitICRemark) {
+ InstrCount = initSizeRemarkInfo(M, FunctionToInstrCount);
+ FunctionSize = F.getInstructionCount();
+ }
while (!LQ.empty()) {
CurrentLoopDeleted = false;
CurrentLoop = LQ.back();
@@ -209,17 +216,28 @@ bool LPPassManager::runOnFunction(Function &F) {
initializeAnalysisImpl(P);
+ bool LocalChanged = false;
{
PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
TimeRegion PassTimer(getPassTimer(P));
- if (EmitICRemark)
- InstrCount = initSizeRemarkInfo(M);
- Changed |= P->runOnLoop(CurrentLoop, *this);
- if (EmitICRemark)
- emitInstrCountChangedRemark(P, M, InstrCount);
+ LocalChanged = P->runOnLoop(CurrentLoop, *this);
+ Changed |= LocalChanged;
+ if (EmitICRemark) {
+ unsigned NewSize = F.getInstructionCount();
+ // Update the size of the function, emit a remark, and update the
+ // size of the module.
+ if (NewSize != FunctionSize) {
+ int64_t Delta = static_cast<int64_t>(NewSize) -
+ static_cast<int64_t>(FunctionSize);
+ emitInstrCountChangedRemark(P, M, Delta, InstrCount,
+ FunctionToInstrCount, &F);
+ InstrCount = static_cast<int64_t>(InstrCount) + Delta;
+ FunctionSize = NewSize;
+ }
+ }
}
- if (Changed)
+ if (LocalChanged)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
CurrentLoopDeleted ? "<deleted loop>"
: CurrentLoop->getName());
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 5a6bbd7b2ac6..907b321b231a 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -13,7 +13,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/ErrorHandling.h"
@@ -106,9 +105,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
if (!Res.isNonLocal()) {
Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
static_cast<BasicBlock *>(nullptr)));
- } else if (auto CS = CallSite(Inst)) {
+ } else if (auto *Call = dyn_cast<CallBase>(Inst)) {
const MemoryDependenceResults::NonLocalDepInfo &NLDI =
- MDA.getNonLocalCallDependency(CS);
+ MDA.getNonLocalCallDependency(Call);
DepSet &InstDeps = Deps[Inst];
for (const NonLocalDepEntry &I : NLDI) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index feae53c54ecb..e22182b99e11 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -31,7 +31,6 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -182,8 +181,8 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
}
/// Private helper for finding the local dependencies of a call site.
-MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
- CallSite CS, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
+MemDepResult MemoryDependenceResults::getCallDependencyFrom(
+ CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
BasicBlock *BB) {
unsigned Limit = BlockScanLimit;
@@ -205,21 +204,21 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
ModRefInfo MR = GetLocation(Inst, Loc, TLI);
if (Loc.Ptr) {
// A simple instruction.
- if (isModOrRefSet(AA.getModRefInfo(CS, Loc)))
+ if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
return MemDepResult::getClobber(Inst);
continue;
}
- if (auto InstCS = CallSite(Inst)) {
+ if (auto *CallB = dyn_cast<CallBase>(Inst)) {
// If these two calls do not interfere, look past it.
- if (isNoModRef(AA.getModRefInfo(CS, InstCS))) {
- // If the two calls are the same, return InstCS as a Def, so that
- // CS can be found redundant and eliminated.
+ if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
+ // If the two calls are the same, return Inst as a Def, so that
+ // Call can be found redundant and eliminated.
if (isReadOnlyCall && !isModSet(MR) &&
- CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+ Call->isIdenticalToWhenDefined(CallB))
return MemDepResult::getDef(Inst);
- // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+ // Otherwise if the two calls don't interact (e.g. CallB is readnone)
// keep scanning.
continue;
} else
@@ -750,11 +749,10 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
LocalCache = getPointerDependencyFrom(
MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
- } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
- CallSite QueryCS(QueryInst);
- bool isReadOnly = AA.onlyReadsMemory(QueryCS);
- LocalCache = getCallSiteDependencyFrom(
- QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
+ } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
+ bool isReadOnly = AA.onlyReadsMemory(QueryCall);
+ LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
+ ScanPos->getIterator(), QueryParent);
} else
// Non-memory instruction.
LocalCache = MemDepResult::getUnknown();
@@ -780,11 +778,11 @@ static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
#endif
const MemoryDependenceResults::NonLocalDepInfo &
-MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
- assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
+ assert(getDependency(QueryCall).isNonLocal() &&
"getNonLocalCallDependency should only be used on calls with "
"non-local deps!");
- PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+ PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
NonLocalDepInfo &Cache = CacheP.first;
// This is the set of blocks that need to be recomputed. In the cached case,
@@ -807,21 +805,21 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.push_back(Entry.getBB());
// Sort the cache so that we can do fast binary search lookups below.
- llvm::sort(Cache.begin(), Cache.end());
+ llvm::sort(Cache);
++NumCacheDirtyNonLocal;
// cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
// << Cache.size() << " cached: " << *QueryInst;
} else {
// Seed DirtyBlocks with each of the preds of QueryInst's block.
- BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+ BasicBlock *QueryBB = QueryCall->getParent();
for (BasicBlock *Pred : PredCache.get(QueryBB))
DirtyBlocks.push_back(Pred);
++NumUncacheNonLocal;
}
// isReadonlyCall - If this is a read-only call, we can be more aggressive.
- bool isReadonlyCall = AA.onlyReadsMemory(QueryCS);
+ bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
SmallPtrSet<BasicBlock *, 32> Visited;
@@ -865,8 +863,8 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
if (Instruction *Inst = ExistingResult->getResult().getInst()) {
ScanPos = Inst->getIterator();
// We're removing QueryInst's use of Inst.
- RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
- QueryCS.getInstruction());
+ RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
+ QueryCall);
}
}
@@ -874,8 +872,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
MemDepResult Dep;
if (ScanPos != DirtyBB->begin()) {
- Dep =
- getCallSiteDependencyFrom(QueryCS, isReadonlyCall, ScanPos, DirtyBB);
+ Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
// No dependence found. If this is the entry block of the function, it is
// a clobber, otherwise it is unknown.
@@ -897,7 +894,7 @@ MemoryDependenceResults::getNonLocalCallDependency(CallSite QueryCS) {
// Keep the ReverseNonLocalDeps map up to date so we can efficiently
// update this when we remove instructions.
if (Instruction *Inst = Dep.getInst())
- ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+ ReverseNonLocalDeps[Inst].insert(QueryCall);
} else {
// If the block *is* completely transparent to the load, we need to check
@@ -1070,7 +1067,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
break;
default:
// Added many values, do a full scale sort.
- llvm::sort(Cache.begin(), Cache.end());
+ llvm::sort(Cache);
break;
}
}
@@ -1113,21 +1110,36 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// If we already have a cache entry for this CacheKey, we may need to do some
// work to reconcile the cache entry and the current query.
if (!Pair.second) {
- if (CacheInfo->Size < Loc.Size) {
- // The query's Size is greater than the cached one. Throw out the
- // cached data and proceed with the query at the greater size.
- CacheInfo->Pair = BBSkipFirstBlockPair();
- CacheInfo->Size = Loc.Size;
- for (auto &Entry : CacheInfo->NonLocalDeps)
- if (Instruction *Inst = Entry.getResult().getInst())
- RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
- CacheInfo->NonLocalDeps.clear();
- } else if (CacheInfo->Size > Loc.Size) {
- // This query's Size is less than the cached one. Conservatively restart
- // the query using the greater size.
- return getNonLocalPointerDepFromBB(
- QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
- StartBB, Result, Visited, SkipFirstBlock);
+ if (CacheInfo->Size != Loc.Size) {
+ bool ThrowOutEverything;
+ if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
+ // FIXME: We may be able to do better in the face of results with mixed
+ // precision. We don't appear to get them in practice, though, so just
+ // be conservative.
+ ThrowOutEverything =
+ CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
+ CacheInfo->Size.getValue() < Loc.Size.getValue();
+ } else {
+ // For our purposes, unknown size > all others.
+ ThrowOutEverything = !Loc.Size.hasValue();
+ }
+
+ if (ThrowOutEverything) {
+ // The query's Size is greater than the cached one. Throw out the
+ // cached data and proceed with the query at the greater size.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ CacheInfo->Size = Loc.Size;
+ for (auto &Entry : CacheInfo->NonLocalDeps)
+ if (Instruction *Inst = Entry.getResult().getInst())
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+ CacheInfo->NonLocalDeps.clear();
+ } else {
+ // This query's Size is less than the cached one. Conservatively restart
+ // the query using the greater size.
+ return getNonLocalPointerDepFromBB(
+ QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
+ StartBB, Result, Visited, SkipFirstBlock);
+ }
}
// If the query's AATags are inconsistent with the cached one,
@@ -1572,7 +1584,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
// RemInst can't be the terminator if it has local stuff depending on it.
- assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
+ assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
"Nothing can locally depend on a terminator");
for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
@@ -1662,7 +1674,7 @@ void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
// Re-sort the NonLocalDepInfo. Changing the dirty entry to its
// subsequent value may invalidate the sortedness.
- llvm::sort(NLPDI.begin(), NLPDI.end());
+ llvm::sort(NLPDI);
}
ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp
index 55924db284ec..27e8d72b8e89 100644
--- a/lib/Analysis/MemoryLocation.cpp
+++ b/lib/Analysis/MemoryLocation.cpp
@@ -18,13 +18,28 @@
#include "llvm/IR/Type.h"
using namespace llvm;
+void LocationSize::print(raw_ostream &OS) const {
+ OS << "LocationSize::";
+ if (*this == unknown())
+ OS << "unknown";
+ else if (*this == mapEmpty())
+ OS << "mapEmpty";
+ else if (*this == mapTombstone())
+ OS << "mapTombstone";
+ else if (isPrecise())
+ OS << "precise(" << getValue() << ')';
+ else
+ OS << "upperBound(" << getValue() << ')';
+}
+
MemoryLocation MemoryLocation::get(const LoadInst *LI) {
AAMDNodes AATags;
LI->getAAMetadata(AATags);
const auto &DL = LI->getModule()->getDataLayout();
- return MemoryLocation(LI->getPointerOperand(),
- DL.getTypeStoreSize(LI->getType()), AATags);
+ return MemoryLocation(
+ LI->getPointerOperand(),
+ LocationSize::precise(DL.getTypeStoreSize(LI->getType())), AATags);
}
MemoryLocation MemoryLocation::get(const StoreInst *SI) {
@@ -33,7 +48,8 @@ MemoryLocation MemoryLocation::get(const StoreInst *SI) {
const auto &DL = SI->getModule()->getDataLayout();
return MemoryLocation(SI->getPointerOperand(),
- DL.getTypeStoreSize(SI->getValueOperand()->getType()),
+ LocationSize::precise(DL.getTypeStoreSize(
+ SI->getValueOperand()->getType())),
AATags);
}
@@ -41,7 +57,8 @@ MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
AAMDNodes AATags;
VI->getAAMetadata(AATags);
- return MemoryLocation(VI->getPointerOperand(), UnknownSize, AATags);
+ return MemoryLocation(VI->getPointerOperand(), LocationSize::unknown(),
+ AATags);
}
MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
@@ -49,9 +66,10 @@ MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
CXI->getAAMetadata(AATags);
const auto &DL = CXI->getModule()->getDataLayout();
- return MemoryLocation(
- CXI->getPointerOperand(),
- DL.getTypeStoreSize(CXI->getCompareOperand()->getType()), AATags);
+ return MemoryLocation(CXI->getPointerOperand(),
+ LocationSize::precise(DL.getTypeStoreSize(
+ CXI->getCompareOperand()->getType())),
+ AATags);
}
MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
@@ -60,7 +78,8 @@ MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
const auto &DL = RMWI->getModule()->getDataLayout();
return MemoryLocation(RMWI->getPointerOperand(),
- DL.getTypeStoreSize(RMWI->getValOperand()->getType()),
+ LocationSize::precise(DL.getTypeStoreSize(
+ RMWI->getValOperand()->getType())),
AATags);
}
@@ -73,9 +92,9 @@ MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) {
}
MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
- uint64_t Size = UnknownSize;
+ auto Size = LocationSize::unknown();
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
- Size = C->getValue().getZExtValue();
+ Size = LocationSize::precise(C->getValue().getZExtValue());
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
@@ -94,9 +113,9 @@ MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) {
}
MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
- uint64_t Size = UnknownSize;
+ auto Size = LocationSize::unknown();
if (ConstantInt *C = dyn_cast<ConstantInt>(MI->getLength()))
- Size = C->getValue().getZExtValue();
+ Size = LocationSize::precise(C->getValue().getZExtValue());
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
@@ -106,15 +125,15 @@ MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
return MemoryLocation(MI->getRawDest(), Size, AATags);
}
-MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
+MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
unsigned ArgIdx,
- const TargetLibraryInfo &TLI) {
+ const TargetLibraryInfo *TLI) {
AAMDNodes AATags;
- CS->getAAMetadata(AATags);
- const Value *Arg = CS.getArgument(ArgIdx);
+ Call->getAAMetadata(AATags);
+ const Value *Arg = Call->getArgOperand(ArgIdx);
// We may be able to produce an exact size for known intrinsics.
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Call)) {
const DataLayout &DL = II->getModule()->getDataLayout();
switch (II->getIntrinsicID()) {
@@ -126,7 +145,8 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memory intrinsic");
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
- return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+ return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+ AATags);
break;
case Intrinsic::lifetime_start:
@@ -134,23 +154,37 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
case Intrinsic::invariant_start:
assert(ArgIdx == 1 && "Invalid argument index");
return MemoryLocation(
- Arg, cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AATags);
+ Arg,
+ LocationSize::precise(
+ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue()),
+ AATags);
case Intrinsic::invariant_end:
+ // The first argument to an invariant.end is a "descriptor" type (e.g. a
+ // pointer to a empty struct) which is never actually dereferenced.
+ if (ArgIdx == 0)
+ return MemoryLocation(Arg, LocationSize::precise(0), AATags);
assert(ArgIdx == 2 && "Invalid argument index");
return MemoryLocation(
- Arg, cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AATags);
+ Arg,
+ LocationSize::precise(
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()),
+ AATags);
case Intrinsic::arm_neon_vld1:
assert(ArgIdx == 0 && "Invalid argument index");
// LLVM's vld1 and vst1 intrinsics currently only support a single
// vector register.
- return MemoryLocation(Arg, DL.getTypeStoreSize(II->getType()), AATags);
+ return MemoryLocation(
+ Arg, LocationSize::precise(DL.getTypeStoreSize(II->getType())),
+ AATags);
case Intrinsic::arm_neon_vst1:
assert(ArgIdx == 0 && "Invalid argument index");
- return MemoryLocation(
- Arg, DL.getTypeStoreSize(II->getArgOperand(1)->getType()), AATags);
+ return MemoryLocation(Arg,
+ LocationSize::precise(DL.getTypeStoreSize(
+ II->getArgOperand(1)->getType())),
+ AATags);
}
}
@@ -159,16 +193,20 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
LibFunc F;
- if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
- F == LibFunc_memset_pattern16 && TLI.has(F)) {
+ if (TLI && Call->getCalledFunction() &&
+ TLI->getLibFunc(*Call->getCalledFunction(), F) &&
+ F == LibFunc_memset_pattern16 && TLI->has(F)) {
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
if (ArgIdx == 1)
- return MemoryLocation(Arg, 16, AATags);
- if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
- return MemoryLocation(Arg, LenCI->getZExtValue(), AATags);
+ return MemoryLocation(Arg, LocationSize::precise(16), AATags);
+ if (const ConstantInt *LenCI =
+ dyn_cast<ConstantInt>(Call->getArgOperand(2)))
+ return MemoryLocation(Arg, LocationSize::precise(LenCI->getZExtValue()),
+ AATags);
}
// FIXME: Handle memset_pattern4 and memset_pattern8 also.
- return MemoryLocation(CS.getArgument(ArgIdx), UnknownSize, AATags);
+ return MemoryLocation(Call->getArgOperand(ArgIdx), LocationSize::unknown(),
+ AATags);
}
diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index f57d490ce96e..6a5567ed765b 100644
--- a/lib/Analysis/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -30,7 +30,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
@@ -77,9 +76,15 @@ static cl::opt<unsigned> MaxCheckLimit(
cl::desc("The maximum number of stores/phis MemorySSA"
"will consider trying to walk past (default = 100)"));
-static cl::opt<bool>
- VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
- cl::desc("Verify MemorySSA in legacy printer pass."));
+// Always verify MemorySSA if expensive checking is enabled.
+#ifdef EXPENSIVE_CHECKS
+bool llvm::VerifyMemorySSA = true;
+#else
+bool llvm::VerifyMemorySSA = false;
+#endif
+static cl::opt<bool, true>
+ VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
+ cl::Hidden, cl::desc("Enable verification of MemorySSA."));
namespace llvm {
@@ -119,16 +124,15 @@ class MemoryLocOrCall {
public:
bool IsCall = false;
- MemoryLocOrCall() = default;
MemoryLocOrCall(MemoryUseOrDef *MUD)
: MemoryLocOrCall(MUD->getMemoryInst()) {}
MemoryLocOrCall(const MemoryUseOrDef *MUD)
: MemoryLocOrCall(MUD->getMemoryInst()) {}
MemoryLocOrCall(Instruction *Inst) {
- if (ImmutableCallSite(Inst)) {
+ if (auto *C = dyn_cast<CallBase>(Inst)) {
IsCall = true;
- CS = ImmutableCallSite(Inst);
+ Call = C;
} else {
IsCall = false;
// There is no such thing as a memorylocation for a fence inst, and it is
@@ -140,9 +144,9 @@ public:
explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {}
- ImmutableCallSite getCS() const {
+ const CallBase *getCall() const {
assert(IsCall);
- return CS;
+ return Call;
}
MemoryLocation getLoc() const {
@@ -157,16 +161,17 @@ public:
if (!IsCall)
return Loc == Other.Loc;
- if (CS.getCalledValue() != Other.CS.getCalledValue())
+ if (Call->getCalledValue() != Other.Call->getCalledValue())
return false;
- return CS.arg_size() == Other.CS.arg_size() &&
- std::equal(CS.arg_begin(), CS.arg_end(), Other.CS.arg_begin());
+ return Call->arg_size() == Other.Call->arg_size() &&
+ std::equal(Call->arg_begin(), Call->arg_end(),
+ Other.Call->arg_begin());
}
private:
union {
- ImmutableCallSite CS;
+ const CallBase *Call;
MemoryLocation Loc;
};
};
@@ -192,9 +197,9 @@ template <> struct DenseMapInfo<MemoryLocOrCall> {
hash_code hash =
hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue(
- MLOC.getCS().getCalledValue()));
+ MLOC.getCall()->getCalledValue()));
- for (const Value *Arg : MLOC.getCS().args())
+ for (const Value *Arg : MLOC.getCall()->args())
hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg));
return hash;
}
@@ -247,24 +252,29 @@ struct ClobberAlias {
// Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being
// ignored if IsClobber = false.
-static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
+static ClobberAlias instructionClobbersQuery(const MemoryDef *MD,
const MemoryLocation &UseLoc,
const Instruction *UseInst,
AliasAnalysis &AA) {
Instruction *DefInst = MD->getMemoryInst();
assert(DefInst && "Defining instruction not actually an instruction");
- ImmutableCallSite UseCS(UseInst);
+ const auto *UseCall = dyn_cast<CallBase>(UseInst);
Optional<AliasResult> AR;
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
// These intrinsics will show up as affecting memory, but they are just
- // markers.
+ // markers, mostly.
+ //
+ // FIXME: We probably don't actually want MemorySSA to model these at all
+ // (including creating MemoryAccesses for them): we just end up inventing
+ // clobbers where they don't really exist at all. Please see D43269 for
+ // context.
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
- if (UseCS)
+ if (UseCall)
return {false, NoAlias};
AR = AA.alias(MemoryLocation(II->getArgOperand(1)), UseLoc);
- return {AR == MustAlias, AR};
+ return {AR != NoAlias, AR};
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
@@ -275,8 +285,8 @@ static ClobberAlias instructionClobbersQuery(MemoryDef *MD,
}
}
- if (UseCS) {
- ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
+ if (UseCall) {
+ ModRefInfo I = AA.getModRefInfo(DefInst, UseCall);
AR = isMustSet(I) ? MustAlias : MayAlias;
return {isModOrRefSet(I), AR};
}
@@ -322,11 +332,12 @@ struct UpwardsMemoryQuery {
// The MemoryAccess we actually got called with, used to test local domination
const MemoryAccess *OriginalAccess = nullptr;
Optional<AliasResult> AR = MayAlias;
+ bool SkipSelfAccess = false;
UpwardsMemoryQuery() = default;
UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
- : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
+ : IsCall(isa<CallBase>(Inst)), Inst(Inst), OriginalAccess(Access) {
if (!IsCall)
StartingLoc = MemoryLocation::get(Inst);
}
@@ -366,13 +377,15 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
/// \param Start The MemoryAccess that we want to walk from.
/// \param ClobberAt A clobber for Start.
/// \param StartLoc The MemoryLocation for Start.
-/// \param MSSA The MemorySSA isntance that Start and ClobberAt belong to.
+/// \param MSSA The MemorySSA instance that Start and ClobberAt belong to.
/// \param Query The UpwardsMemoryQuery we used for our search.
/// \param AA The AliasAnalysis we used for our search.
-static void LLVM_ATTRIBUTE_UNUSED
-checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
+/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
+static void
+checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
const MemoryLocation &StartLoc, const MemorySSA &MSSA,
- const UpwardsMemoryQuery &Query, AliasAnalysis &AA) {
+ const UpwardsMemoryQuery &Query, AliasAnalysis &AA,
+ bool AllowImpreciseClobber = false) {
assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
if (MSSA.isLiveOnEntryDef(Start)) {
@@ -382,21 +395,21 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
}
bool FoundClobber = false;
- DenseSet<MemoryAccessPair> VisitedPhis;
- SmallVector<MemoryAccessPair, 8> Worklist;
+ DenseSet<ConstMemoryAccessPair> VisitedPhis;
+ SmallVector<ConstMemoryAccessPair, 8> Worklist;
Worklist.emplace_back(Start, StartLoc);
// Walk all paths from Start to ClobberAt, while looking for clobbers. If one
// is found, complain.
while (!Worklist.empty()) {
- MemoryAccessPair MAP = Worklist.pop_back_val();
+ auto MAP = Worklist.pop_back_val();
// All we care about is that nothing from Start to ClobberAt clobbers Start.
// We learn nothing from revisiting nodes.
if (!VisitedPhis.insert(MAP).second)
continue;
- for (MemoryAccess *MA : def_chain(MAP.first)) {
+ for (const auto *MA : def_chain(MAP.first)) {
if (MA == ClobberAt) {
- if (auto *MD = dyn_cast<MemoryDef>(MA)) {
+ if (const auto *MD = dyn_cast<MemoryDef>(MA)) {
// instructionClobbersQuery isn't essentially free, so don't use `|=`,
// since it won't let us short-circuit.
//
@@ -418,19 +431,39 @@ checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
// We should never hit liveOnEntry, unless it's the clobber.
assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
- if (auto *MD = dyn_cast<MemoryDef>(MA)) {
- (void)MD;
+ if (const auto *MD = dyn_cast<MemoryDef>(MA)) {
+ // If Start is a Def, skip self.
+ if (MD == Start)
+ continue;
+
assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA)
.IsClobber &&
"Found clobber before reaching ClobberAt!");
continue;
}
+ if (const auto *MU = dyn_cast<MemoryUse>(MA)) {
+ (void)MU;
+ assert (MU == Start &&
+ "Can only find use in def chain if Start is a use");
+ continue;
+ }
+
assert(isa<MemoryPhi>(MA));
- Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end());
+ Worklist.append(
+ upward_defs_begin({const_cast<MemoryAccess *>(MA), MAP.second}),
+ upward_defs_end());
}
}
+ // If the verify is done following an optimization, it's possible that
+ // ClobberAt was a conservative clobbering, that we can now infer is not a
+ // true clobbering access. Don't fail the verify if that's the case.
+ // We do have accesses that claim they're optimized, but could be optimized
+ // further. Updating all these can be expensive, so allow it for now (FIXME).
+ if (AllowImpreciseClobber)
+ return;
+
// If ClobberAt is a MemoryPhi, we can assume something above it acted as a
// clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
@@ -503,13 +536,13 @@ class ClobberWalker {
///
/// This does not test for whether StopAt is a clobber
UpwardsWalkResult
- walkToPhiOrClobber(DefPath &Desc,
- const MemoryAccess *StopAt = nullptr) const {
+ walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr,
+ const MemoryAccess *SkipStopAt = nullptr) const {
assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
for (MemoryAccess *Current : def_chain(Desc.Last)) {
Desc.Last = Current;
- if (Current == StopAt)
+ if (Current == StopAt || Current == SkipStopAt)
return {Current, false, MayAlias};
if (auto *MD = dyn_cast<MemoryDef>(Current)) {
@@ -587,9 +620,16 @@ class ClobberWalker {
if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
continue;
- UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
+ const MemoryAccess *SkipStopWhere = nullptr;
+ if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) {
+ assert(isa<MemoryDef>(Query->OriginalAccess));
+ SkipStopWhere = Query->OriginalAccess;
+ }
+
+ UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere,
+ /*SkipStopAt=*/SkipStopWhere);
if (Res.IsKnownClobber) {
- assert(Res.Result != StopWhere);
+ assert(Res.Result != StopWhere && Res.Result != SkipStopWhere);
// If this wasn't a cache hit, we hit a clobber when walking. That's a
// failure.
TerminatedPath Term{Res.Result, PathIndex};
@@ -601,10 +641,13 @@ class ClobberWalker {
continue;
}
- if (Res.Result == StopWhere) {
+ if (Res.Result == StopWhere || Res.Result == SkipStopWhere) {
// We've hit our target. Save this path off for if we want to continue
- // walking.
- NewPaused.push_back(PathIndex);
+ // walking. If we are in the mode of skipping the OriginalAccess, and
+ // we've reached back to the OriginalAccess, do not save path, we've
+ // just looped back to self.
+ if (Res.Result != SkipStopWhere)
+ NewPaused.push_back(PathIndex);
continue;
}
@@ -875,7 +918,8 @@ public:
}
#ifdef EXPENSIVE_CHECKS
- checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
+ if (!Q.SkipSelfAccess)
+ checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
#endif
return Result;
}
@@ -903,28 +947,76 @@ struct RenamePassData {
namespace llvm {
+class MemorySSA::ClobberWalkerBase {
+ ClobberWalker Walker;
+ MemorySSA *MSSA;
+
+public:
+ ClobberWalkerBase(MemorySSA *M, AliasAnalysis *A, DominatorTree *D)
+ : Walker(*M, *A, *D), MSSA(M) {}
+
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *,
+ const MemoryLocation &);
+ // Second argument (bool), defines whether the clobber search should skip the
+ // original queried access. If true, there will be a follow-up query searching
+ // for a clobber access past "self". Note that the Optimized access is not
+ // updated if a new clobber is found by this SkipSelf search. If this
+ // additional query becomes heavily used we may decide to cache the result.
+ // Walker instantiations will decide how to set the SkipSelf bool.
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, bool);
+ void verify(const MemorySSA *MSSA) { Walker.verify(MSSA); }
+};
+
/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
/// longer does caching on its own, but the name has been retained for the
/// moment.
class MemorySSA::CachingWalker final : public MemorySSAWalker {
- ClobberWalker Walker;
-
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
+ ClobberWalkerBase *Walker;
public:
- CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
+ CachingWalker(MemorySSA *M, ClobberWalkerBase *W)
+ : MemorySSAWalker(M), Walker(W) {}
~CachingWalker() override = default;
using MemorySSAWalker::getClobberingMemoryAccess;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
- MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
- const MemoryLocation &) override;
- void invalidateInfo(MemoryAccess *) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override;
+
+ void invalidateInfo(MemoryAccess *MA) override {
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+ MUD->resetOptimized();
+ }
void verify(const MemorySSA *MSSA) override {
MemorySSAWalker::verify(MSSA);
- Walker.verify(MSSA);
+ Walker->verify(MSSA);
+ }
+};
+
+class MemorySSA::SkipSelfWalker final : public MemorySSAWalker {
+ ClobberWalkerBase *Walker;
+
+public:
+ SkipSelfWalker(MemorySSA *M, ClobberWalkerBase *W)
+ : MemorySSAWalker(M), Walker(W) {}
+ ~SkipSelfWalker() override = default;
+
+ using MemorySSAWalker::getClobberingMemoryAccess;
+
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) override;
+
+ void invalidateInfo(MemoryAccess *MA) override {
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+ MUD->resetOptimized();
+ }
+
+ void verify(const MemorySSA *MSSA) override {
+ MemorySSAWalker::verify(MSSA);
+ Walker->verify(MSSA);
}
};
@@ -1063,7 +1155,7 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
: AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
- NextID(0) {
+ SkipWalker(nullptr), NextID(0) {
buildMemorySSA();
}
@@ -1394,10 +1486,25 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
if (Walker)
return Walker.get();
- Walker = llvm::make_unique<CachingWalker>(this, AA, DT);
+ if (!WalkerBase)
+ WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+
+ Walker = llvm::make_unique<CachingWalker>(this, WalkerBase.get());
return Walker.get();
}
+MemorySSAWalker *MemorySSA::getSkipSelfWalker() {
+ if (SkipWalker)
+ return SkipWalker.get();
+
+ if (!WalkerBase)
+ WalkerBase = llvm::make_unique<ClobberWalkerBase>(this, AA, DT);
+
+ SkipWalker = llvm::make_unique<SkipSelfWalker>(this, WalkerBase.get());
+ return SkipWalker.get();
+ }
+
+
// This is a helper function used by the creation routines. It places NewAccess
// into the access and defs lists for a given basic block, at the given
// insertion point.
@@ -1461,15 +1568,25 @@ void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
BlockNumberingValid.erase(BB);
}
+void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) {
+ // Keep it in the lookup tables, remove from the lists
+ removeFromLists(What, false);
+
+ // Note that moving should implicitly invalidate the optimized state of a
+ // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a
+ // MemoryDef.
+ if (auto *MD = dyn_cast<MemoryDef>(What))
+ MD->resetOptimized();
+ What->setBlock(BB);
+}
+
// Move What before Where in the IR. The end result is that What will belong to
// the right lists and have the right Block set, but will not otherwise be
// correct. It will not have the right defining access, and if it is a def,
// things below it will not properly be updated.
void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
AccessList::iterator Where) {
- // Keep it in the lookup tables, remove from the lists
- removeFromLists(What, false);
- What->setBlock(BB);
+ prepareForMoveTo(What, BB);
insertIntoListsBefore(What, BB, Where);
}
@@ -1485,8 +1602,7 @@ void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB,
assert(Inserted && "Cannot move a Phi to a block that already has one");
}
- removeFromLists(What, false);
- What->setBlock(BB);
+ prepareForMoveTo(What, BB);
insertIntoListsForBlock(What, BB, Point);
}
@@ -1500,9 +1616,10 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
}
MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
- MemoryAccess *Definition) {
+ MemoryAccess *Definition,
+ const MemoryUseOrDef *Template) {
assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
- MemoryUseOrDef *NewAccess = createNewAccess(I);
+ MemoryUseOrDef *NewAccess = createNewAccess(I, Template);
assert(
NewAccess != nullptr &&
"Tried to create a memory access for a non-memory touching instruction");
@@ -1525,7 +1642,8 @@ static inline bool isOrdered(const Instruction *I) {
}
/// Helper function to create new memory accesses
-MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
+MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
+ const MemoryUseOrDef *Template) {
// The assume intrinsic has a control dependency which we model by claiming
// that it writes arbitrarily. Ignore that fake memory dependency here.
// FIXME: Replace this special casing with a more accurate modelling of
@@ -1534,18 +1652,31 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
if (II->getIntrinsicID() == Intrinsic::assume)
return nullptr;
- // Find out what affect this instruction has on memory.
- ModRefInfo ModRef = AA->getModRefInfo(I, None);
- // The isOrdered check is used to ensure that volatiles end up as defs
- // (atomics end up as ModRef right now anyway). Until we separate the
- // ordering chain from the memory chain, this enables people to see at least
- // some relative ordering to volatiles. Note that getClobberingMemoryAccess
- // will still give an answer that bypasses other volatile loads. TODO:
- // Separate memory aliasing and ordering into two different chains so that we
- // can precisely represent both "what memory will this read/write/is clobbered
- // by" and "what instructions can I move this past".
- bool Def = isModSet(ModRef) || isOrdered(I);
- bool Use = isRefSet(ModRef);
+ bool Def, Use;
+ if (Template) {
+ Def = dyn_cast_or_null<MemoryDef>(Template) != nullptr;
+ Use = dyn_cast_or_null<MemoryUse>(Template) != nullptr;
+#if !defined(NDEBUG)
+ ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ bool DefCheck, UseCheck;
+ DefCheck = isModSet(ModRef) || isOrdered(I);
+ UseCheck = isRefSet(ModRef);
+ assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template");
+#endif
+ } else {
+ // Find out what affect this instruction has on memory.
+ ModRefInfo ModRef = AA->getModRefInfo(I, None);
+ // The isOrdered check is used to ensure that volatiles end up as defs
+ // (atomics end up as ModRef right now anyway). Until we separate the
+ // ordering chain from the memory chain, this enables people to see at least
+ // some relative ordering to volatiles. Note that getClobberingMemoryAccess
+ // will still give an answer that bypasses other volatile loads. TODO:
+ // Separate memory aliasing and ordering into two different chains so that
+ // we can precisely represent both "what memory will this read/write/is
+ // clobbered by" and "what instructions can I move this past".
+ Def = isModSet(ModRef) || isOrdered(I);
+ Use = isRefSet(ModRef);
+ }
// It's possible for an instruction to not modify memory at all. During
// construction, we ignore them.
@@ -1648,6 +1779,34 @@ void MemorySSA::verifyMemorySSA() const {
verifyOrdering(F);
verifyDominationNumbers(F);
Walker->verify(this);
+ verifyClobberSanity(F);
+}
+
+/// Check sanity of the clobbering instruction for access MA.
+void MemorySSA::checkClobberSanityAccess(const MemoryAccess *MA) const {
+ if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+ if (!MUD->isOptimized())
+ return;
+ auto *I = MUD->getMemoryInst();
+ auto Loc = MemoryLocation::getOrNone(I);
+ if (Loc == None)
+ return;
+ auto *Clobber = MUD->getOptimized();
+ UpwardsMemoryQuery Q(I, MUD);
+ checkClobberSanity(MUD, Clobber, *Loc, *this, Q, *AA, true);
+ }
+}
+
+void MemorySSA::verifyClobberSanity(const Function &F) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+ for (const BasicBlock &BB : F) {
+ const AccessList *Accesses = getBlockAccesses(&BB);
+ if (!Accesses)
+ continue;
+ for (const MemoryAccess &MA : *Accesses)
+ checkClobberSanityAccess(&MA);
+ }
+#endif
}
/// Verify that all of the blocks we believe to have valid domination numbers
@@ -1691,6 +1850,7 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
/// Verify that the order and existence of MemoryAccesses matches the
/// order and existence of memory affecting instructions.
void MemorySSA::verifyOrdering(Function &F) const {
+#ifndef NDEBUG
// Walk all the blocks, comparing what the lookups think and what the access
// lists think, as well as the order in the blocks vs the order in the access
// lists.
@@ -1749,6 +1909,7 @@ void MemorySSA::verifyOrdering(Function &F) const {
}
ActualDefs.clear();
}
+#endif
}
/// Verify the domination properties of MemorySSA by checking that each
@@ -1791,6 +1952,7 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
/// accesses and verifying that, for each use, it appears in the
/// appropriate def's use list
void MemorySSA::verifyDefUses(Function &F) const {
+#ifndef NDEBUG
for (BasicBlock &B : F) {
// Phi nodes are attached to basic blocks
if (MemoryPhi *Phi = getMemoryAccess(&B)) {
@@ -1811,14 +1973,7 @@ void MemorySSA::verifyDefUses(Function &F) const {
}
}
}
-}
-
-MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const {
- return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
-}
-
-MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
- return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
+#endif
}
/// Perform a local numbering on blocks so that instruction ordering can be
@@ -2051,25 +2206,11 @@ void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
-MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
- DominatorTree *D)
- : MemorySSAWalker(M), Walker(*M, *A, *D) {}
-
-void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
- MUD->resetOptimized();
-}
-
-/// Walk the use-def chains starting at \p MA and find
+/// Walk the use-def chains starting at \p StartingAccess and find
/// the MemoryAccess that actually clobbers Loc.
///
/// \returns our clobbering memory access
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
- MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
- return Walker.findClobber(StartingAccess, Q);
-}
-
-MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
+MemoryAccess *MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(
MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
if (isa<MemoryPhi>(StartingAccess))
return StartingAccess;
@@ -2082,7 +2223,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
// Conservatively, fences are always clobbers, so don't perform the walk if we
// hit a fence.
- if (!ImmutableCallSite(I) && I->isFenceLike())
+ if (!isa<CallBase>(I) && I->isFenceLike())
return StartingUseOrDef;
UpwardsMemoryQuery Q;
@@ -2093,11 +2234,12 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
// Unlike the other function, do not walk to the def of a def, because we are
// handed something we already believe is the clobbering access.
+ // We never set SkipSelf to true in Q in this method.
MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
? StartingUseOrDef->getDefiningAccess()
: StartingUseOrDef;
- MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
+ MemoryAccess *Clobber = Walker.findClobber(DefiningAccess, Q);
LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
LLVM_DEBUG(dbgs() << *StartingUseOrDef << "\n");
LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
@@ -2106,26 +2248,33 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
}
MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase(MemoryAccess *MA,
+ bool SkipSelf) {
auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
// If this is a MemoryPhi, we can't do anything.
if (!StartingAccess)
return MA;
+ bool IsOptimized = false;
+
// If this is an already optimized use or def, return the optimized result.
// Note: Currently, we store the optimized def result in a separate field,
// since we can't use the defining access.
- if (StartingAccess->isOptimized())
- return StartingAccess->getOptimized();
+ if (StartingAccess->isOptimized()) {
+ if (!SkipSelf || !isa<MemoryDef>(StartingAccess))
+ return StartingAccess->getOptimized();
+ IsOptimized = true;
+ }
const Instruction *I = StartingAccess->getMemoryInst();
- UpwardsMemoryQuery Q(I, StartingAccess);
// We can't sanely do anything with a fence, since they conservatively clobber
// all memory, and have no locations to get pointers from to try to
// disambiguate.
- if (!Q.IsCall && I->isFenceLike())
+ if (!isa<CallBase>(I) && I->isFenceLike())
return StartingAccess;
+ UpwardsMemoryQuery Q(I, StartingAccess);
+
if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
StartingAccess->setOptimized(LiveOnEntry);
@@ -2133,33 +2282,71 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
return LiveOnEntry;
}
- // Start with the thing we already think clobbers this location
- MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+ MemoryAccess *OptimizedAccess;
+ if (!IsOptimized) {
+ // Start with the thing we already think clobbers this location
+ MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
+
+ // At this point, DefiningAccess may be the live on entry def.
+ // If it is, we will not get a better result.
+ if (MSSA->isLiveOnEntryDef(DefiningAccess)) {
+ StartingAccess->setOptimized(DefiningAccess);
+ StartingAccess->setOptimizedAccessType(None);
+ return DefiningAccess;
+ }
- // At this point, DefiningAccess may be the live on entry def.
- // If it is, we will not get a better result.
- if (MSSA->isLiveOnEntryDef(DefiningAccess)) {
- StartingAccess->setOptimized(DefiningAccess);
- StartingAccess->setOptimizedAccessType(None);
- return DefiningAccess;
- }
+ OptimizedAccess = Walker.findClobber(DefiningAccess, Q);
+ StartingAccess->setOptimized(OptimizedAccess);
+ if (MSSA->isLiveOnEntryDef(OptimizedAccess))
+ StartingAccess->setOptimizedAccessType(None);
+ else if (Q.AR == MustAlias)
+ StartingAccess->setOptimizedAccessType(MustAlias);
+ } else
+ OptimizedAccess = StartingAccess->getOptimized();
- MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
- LLVM_DEBUG(dbgs() << *DefiningAccess << "\n");
- LLVM_DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
- LLVM_DEBUG(dbgs() << *Result << "\n");
-
- StartingAccess->setOptimized(Result);
- if (MSSA->isLiveOnEntryDef(Result))
- StartingAccess->setOptimizedAccessType(None);
- else if (Q.AR == MustAlias)
- StartingAccess->setOptimizedAccessType(MustAlias);
+ LLVM_DEBUG(dbgs() << *StartingAccess << "\n");
+ LLVM_DEBUG(dbgs() << "Optimized Memory SSA clobber for " << *I << " is ");
+ LLVM_DEBUG(dbgs() << *OptimizedAccess << "\n");
+
+ MemoryAccess *Result;
+ if (SkipSelf && isa<MemoryPhi>(OptimizedAccess) &&
+ isa<MemoryDef>(StartingAccess)) {
+ assert(isa<MemoryDef>(Q.OriginalAccess));
+ Q.SkipSelfAccess = true;
+ Result = Walker.findClobber(OptimizedAccess, Q);
+ } else
+ Result = OptimizedAccess;
+
+ LLVM_DEBUG(dbgs() << "Result Memory SSA clobber [SkipSelf = " << SkipSelf);
+ LLVM_DEBUG(dbgs() << "] for " << *I << " is " << *Result << "\n");
return Result;
}
MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+ return Walker->getClobberingMemoryAccessBase(MA, false);
+}
+
+MemoryAccess *
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc);
+}
+
+MemoryAccess *
+MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+ return Walker->getClobberingMemoryAccessBase(MA, true);
+}
+
+MemoryAccess *
+MemorySSA::SkipSelfWalker::getClobberingMemoryAccess(MemoryAccess *MA,
+ const MemoryLocation &Loc) {
+ return Walker->getClobberingMemoryAccessBase(MA, Loc);
+}
+
+MemoryAccess *
DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
return Use->getDefiningAccess();
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index abe2b3c25a58..6c817d203684 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------===//
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -91,7 +93,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
// FIXME: Figure out whether this is dead code and if so remove it.
if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
// These will have been filled in by the recursive read we did above.
- std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
+ llvm::copy(PhiOps, Phi->op_begin());
std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
}
} else {
@@ -264,16 +266,15 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
UI != UE;) {
Use &U = *UI++;
- // Leave the uses alone
- if (isa<MemoryUse>(U.getUser()))
+ // Leave the MemoryUses alone.
+ // Also make sure we skip ourselves to avoid self references.
+ if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
continue;
U.set(MD);
}
}
// and that def is now our defining access.
- // We change them in this order otherwise we will appear in the use list
- // above and reset ourselves.
MD->setDefiningAccess(DefBefore);
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
@@ -392,6 +393,522 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
}
}
+void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) {
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
+ MPhi->unorderedDeleteIncomingBlock(From);
+ if (MPhi->getNumIncomingValues() == 1)
+ removeMemoryAccess(MPhi);
+ }
+}
+
+void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From,
+ BasicBlock *To) {
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) {
+ bool Found = false;
+ MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) {
+ if (From != B)
+ return false;
+ if (Found)
+ return true;
+ Found = true;
+ return false;
+ });
+ if (MPhi->getNumIncomingValues() == 1)
+ removeMemoryAccess(MPhi);
+ }
+}
+
+void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
+ const ValueToValueMapTy &VMap,
+ PhiToDefMap &MPhiMap) {
+ auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * {
+ MemoryAccess *InsnDefining = MA;
+ if (MemoryUseOrDef *DefMUD = dyn_cast<MemoryUseOrDef>(InsnDefining)) {
+ if (!MSSA->isLiveOnEntryDef(DefMUD)) {
+ Instruction *DefMUDI = DefMUD->getMemoryInst();
+ assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+ if (Instruction *NewDefMUDI =
+ cast_or_null<Instruction>(VMap.lookup(DefMUDI)))
+ InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
+ }
+ } else {
+ MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
+ if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi))
+ InsnDefining = NewDefPhi;
+ }
+ assert(InsnDefining && "Defining instruction cannot be nullptr.");
+ return InsnDefining;
+ };
+
+ const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+ if (!Acc)
+ return;
+ for (const MemoryAccess &MA : *Acc) {
+ if (const MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&MA)) {
+ Instruction *Insn = MUD->getMemoryInst();
+ // Entry does not exist if the clone of the block did not clone all
+ // instructions. This occurs in LoopRotate when cloning instructions
+ // from the old header to the old preheader. The cloned instruction may
+ // also be a simplified Value, not an Instruction (see LoopRotate).
+ if (Instruction *NewInsn =
+ dyn_cast_or_null<Instruction>(VMap.lookup(Insn))) {
+ MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
+ NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD);
+ MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End);
+ }
+ }
+ }
+}
+
+void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
+ ArrayRef<BasicBlock *> ExitBlocks,
+ const ValueToValueMapTy &VMap,
+ bool IgnoreIncomingWithNoClones) {
+ PhiToDefMap MPhiMap;
+
+ auto FixPhiIncomingValues = [&](MemoryPhi *Phi, MemoryPhi *NewPhi) {
+ assert(Phi && NewPhi && "Invalid Phi nodes.");
+ BasicBlock *NewPhiBB = NewPhi->getBlock();
+ SmallPtrSet<BasicBlock *, 4> NewPhiBBPreds(pred_begin(NewPhiBB),
+ pred_end(NewPhiBB));
+ for (unsigned It = 0, E = Phi->getNumIncomingValues(); It < E; ++It) {
+ MemoryAccess *IncomingAccess = Phi->getIncomingValue(It);
+ BasicBlock *IncBB = Phi->getIncomingBlock(It);
+
+ if (BasicBlock *NewIncBB = cast_or_null<BasicBlock>(VMap.lookup(IncBB)))
+ IncBB = NewIncBB;
+ else if (IgnoreIncomingWithNoClones)
+ continue;
+
+ // Now we have IncBB, and will need to add incoming from it to NewPhi.
+
+ // If IncBB is not a predecessor of NewPhiBB, then do not add it.
+ // NewPhiBB was cloned without that edge.
+ if (!NewPhiBBPreds.count(IncBB))
+ continue;
+
+ // Determine incoming value and add it as incoming from IncBB.
+ if (MemoryUseOrDef *IncMUD = dyn_cast<MemoryUseOrDef>(IncomingAccess)) {
+ if (!MSSA->isLiveOnEntryDef(IncMUD)) {
+ Instruction *IncI = IncMUD->getMemoryInst();
+ assert(IncI && "Found MemoryUseOrDef with no Instruction.");
+ if (Instruction *NewIncI =
+ cast_or_null<Instruction>(VMap.lookup(IncI))) {
+ IncMUD = MSSA->getMemoryAccess(NewIncI);
+ assert(IncMUD &&
+ "MemoryUseOrDef cannot be null, all preds processed.");
+ }
+ }
+ NewPhi->addIncoming(IncMUD, IncBB);
+ } else {
+ MemoryPhi *IncPhi = cast<MemoryPhi>(IncomingAccess);
+ if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi))
+ NewPhi->addIncoming(NewDefPhi, IncBB);
+ else
+ NewPhi->addIncoming(IncPhi, IncBB);
+ }
+ }
+ };
+
+ auto ProcessBlock = [&](BasicBlock *BB) {
+ BasicBlock *NewBlock = cast_or_null<BasicBlock>(VMap.lookup(BB));
+ if (!NewBlock)
+ return;
+
+ assert(!MSSA->getWritableBlockAccesses(NewBlock) &&
+ "Cloned block should have no accesses");
+
+ // Add MemoryPhi.
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) {
+ MemoryPhi *NewPhi = MSSA->createMemoryPhi(NewBlock);
+ MPhiMap[MPhi] = NewPhi;
+ }
+ // Update Uses and Defs.
+ cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
+ };
+
+ for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ ProcessBlock(BB);
+
+ for (auto BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
+ if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
+ FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi));
+}
+
+void MemorySSAUpdater::updateForClonedBlockIntoPred(
+ BasicBlock *BB, BasicBlock *P1, const ValueToValueMapTy &VM) {
+ // All defs/phis from outside BB that are used in BB, are valid uses in P1.
+ // Since those defs/phis must have dominated BB, and also dominate P1.
+ // Defs from BB being used in BB will be replaced with the cloned defs from
+ // VM. The uses of BB's Phi (if it exists) in BB will be replaced by the
+ // incoming def into the Phi from P1.
+ PhiToDefMap MPhiMap;
+ if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
+ MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
+ cloneUsesAndDefs(BB, P1, VM, MPhiMap);
+}
+
+template <typename Iter>
+void MemorySSAUpdater::privateUpdateExitBlocksForClonedLoop(
+ ArrayRef<BasicBlock *> ExitBlocks, Iter ValuesBegin, Iter ValuesEnd,
+ DominatorTree &DT) {
+ SmallVector<CFGUpdate, 4> Updates;
+ // Update/insert phis in all successors of exit blocks.
+ for (auto *Exit : ExitBlocks)
+ for (const ValueToValueMapTy *VMap : make_range(ValuesBegin, ValuesEnd))
+ if (BasicBlock *NewExit = cast_or_null<BasicBlock>(VMap->lookup(Exit))) {
+ BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+ Updates.push_back({DT.Insert, NewExit, ExitSucc});
+ }
+ applyInsertUpdates(Updates, DT);
+}
+
+void MemorySSAUpdater::updateExitBlocksForClonedLoop(
+ ArrayRef<BasicBlock *> ExitBlocks, const ValueToValueMapTy &VMap,
+ DominatorTree &DT) {
+ const ValueToValueMapTy *const Arr[] = {&VMap};
+ privateUpdateExitBlocksForClonedLoop(ExitBlocks, std::begin(Arr),
+ std::end(Arr), DT);
+}
+
+void MemorySSAUpdater::updateExitBlocksForClonedLoop(
+ ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT) {
+ auto GetPtr = [&](const std::unique_ptr<ValueToValueMapTy> &I) {
+ return I.get();
+ };
+ using MappedIteratorType =
+ mapped_iterator<const std::unique_ptr<ValueToValueMapTy> *,
+ decltype(GetPtr)>;
+ auto MapBegin = MappedIteratorType(VMaps.begin(), GetPtr);
+ auto MapEnd = MappedIteratorType(VMaps.end(), GetPtr);
+ privateUpdateExitBlocksForClonedLoop(ExitBlocks, MapBegin, MapEnd, DT);
+}
+
+void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
+ DominatorTree &DT) {
+ SmallVector<CFGUpdate, 4> RevDeleteUpdates;
+ SmallVector<CFGUpdate, 4> InsertUpdates;
+ for (auto &Update : Updates) {
+ if (Update.getKind() == DT.Insert)
+ InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
+ else
+ RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()});
+ }
+
+ if (!RevDeleteUpdates.empty()) {
+ // Update for inserted edges: use newDT and snapshot CFG as if deletes had
+ // not occured.
+ // FIXME: This creates a new DT, so it's more expensive to do mix
+ // delete/inserts vs just inserts. We can do an incremental update on the DT
+ // to revert deletes, than re-delete the edges. Teaching DT to do this, is
+ // part of a pending cleanup.
+ DominatorTree NewDT(DT, RevDeleteUpdates);
+ GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
+ applyInsertUpdates(InsertUpdates, NewDT, &GD);
+ } else {
+ GraphDiff<BasicBlock *> GD;
+ applyInsertUpdates(InsertUpdates, DT, &GD);
+ }
+
+ // Update for deleted edges
+ for (auto &Update : RevDeleteUpdates)
+ removeEdge(Update.getFrom(), Update.getTo());
+}
+
+void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
+ DominatorTree &DT) {
+ GraphDiff<BasicBlock *> GD;
+ applyInsertUpdates(Updates, DT, &GD);
+}
+
+void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
+ DominatorTree &DT,
+ const GraphDiff<BasicBlock *> *GD) {
+ // Get recursive last Def, assuming well formed MSSA and updated DT.
+ auto GetLastDef = [&](BasicBlock *BB) -> MemoryAccess * {
+ while (true) {
+ MemorySSA::DefsList *Defs = MSSA->getWritableBlockDefs(BB);
+ // Return last Def or Phi in BB, if it exists.
+ if (Defs)
+ return &*(--Defs->end());
+
+ // Check number of predecessors, we only care if there's more than one.
+ unsigned Count = 0;
+ BasicBlock *Pred = nullptr;
+ for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
+ Pred = Pair.second;
+ Count++;
+ if (Count == 2)
+ break;
+ }
+
+ // If BB has multiple predecessors, get last definition from IDom.
+ if (Count != 1) {
+ // [SimpleLoopUnswitch] If BB is a dead block, about to be deleted, its
+ // DT is invalidated. Return LoE as its last def. This will be added to
+ // MemoryPhi node, and later deleted when the block is deleted.
+ if (!DT.getNode(BB))
+ return MSSA->getLiveOnEntryDef();
+ if (auto *IDom = DT.getNode(BB)->getIDom())
+ if (IDom->getBlock() != BB) {
+ BB = IDom->getBlock();
+ continue;
+ }
+ return MSSA->getLiveOnEntryDef();
+ } else {
+ // Single predecessor, BB cannot be dead. GetLastDef of Pred.
+ assert(Count == 1 && Pred && "Single predecessor expected.");
+ BB = Pred;
+ }
+ };
+ llvm_unreachable("Unable to get last definition.");
+ };
+
+ // Get nearest IDom given a set of blocks.
+ // TODO: this can be optimized by starting the search at the node with the
+ // lowest level (highest in the tree).
+ auto FindNearestCommonDominator =
+ [&](const SmallSetVector<BasicBlock *, 2> &BBSet) -> BasicBlock * {
+ BasicBlock *PrevIDom = *BBSet.begin();
+ for (auto *BB : BBSet)
+ PrevIDom = DT.findNearestCommonDominator(PrevIDom, BB);
+ return PrevIDom;
+ };
+
+ // Get all blocks that dominate PrevIDom, stop when reaching CurrIDom. Do not
+ // include CurrIDom.
+ auto GetNoLongerDomBlocks =
+ [&](BasicBlock *PrevIDom, BasicBlock *CurrIDom,
+ SmallVectorImpl<BasicBlock *> &BlocksPrevDom) {
+ if (PrevIDom == CurrIDom)
+ return;
+ BlocksPrevDom.push_back(PrevIDom);
+ BasicBlock *NextIDom = PrevIDom;
+ while (BasicBlock *UpIDom =
+ DT.getNode(NextIDom)->getIDom()->getBlock()) {
+ if (UpIDom == CurrIDom)
+ break;
+ BlocksPrevDom.push_back(UpIDom);
+ NextIDom = UpIDom;
+ }
+ };
+
+ // Map a BB to its predecessors: added + previously existing. To get a
+ // deterministic order, store predecessors as SetVectors. The order in each
+ // will be defined by teh order in Updates (fixed) and the order given by
+ // children<> (also fixed). Since we further iterate over these ordered sets,
+ // we lose the information of multiple edges possibly existing between two
+ // blocks, so we'll keep and EdgeCount map for that.
+ // An alternate implementation could keep unordered set for the predecessors,
+ // traverse either Updates or children<> each time to get the deterministic
+ // order, and drop the usage of EdgeCount. This alternate approach would still
+ // require querying the maps for each predecessor, and children<> call has
+ // additional computation inside for creating the snapshot-graph predecessors.
+ // As such, we favor using a little additional storage and less compute time.
+ // This decision can be revisited if we find the alternative more favorable.
+
+ struct PredInfo {
+ SmallSetVector<BasicBlock *, 2> Added;
+ SmallSetVector<BasicBlock *, 2> Prev;
+ };
+ SmallDenseMap<BasicBlock *, PredInfo> PredMap;
+
+ for (auto &Edge : Updates) {
+ BasicBlock *BB = Edge.getTo();
+ auto &AddedBlockSet = PredMap[BB].Added;
+ AddedBlockSet.insert(Edge.getFrom());
+ }
+
+ // Store all existing predecessor for each BB, at least one must exist.
+ SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, int> EdgeCountMap;
+ SmallPtrSet<BasicBlock *, 2> NewBlocks;
+ for (auto &BBPredPair : PredMap) {
+ auto *BB = BBPredPair.first;
+ const auto &AddedBlockSet = BBPredPair.second.Added;
+ auto &PrevBlockSet = BBPredPair.second.Prev;
+ for (auto &Pair : children<GraphDiffInvBBPair>({GD, BB})) {
+ BasicBlock *Pi = Pair.second;
+ if (!AddedBlockSet.count(Pi))
+ PrevBlockSet.insert(Pi);
+ EdgeCountMap[{Pi, BB}]++;
+ }
+
+ if (PrevBlockSet.empty()) {
+ assert(pred_size(BB) == AddedBlockSet.size() && "Duplicate edges added.");
+ LLVM_DEBUG(
+ dbgs()
+ << "Adding a predecessor to a block with no predecessors. "
+ "This must be an edge added to a new, likely cloned, block. "
+ "Its memory accesses must be already correct, assuming completed "
+ "via the updateExitBlocksForClonedLoop API. "
+ "Assert a single such edge is added so no phi addition or "
+ "additional processing is required.\n");
+ assert(AddedBlockSet.size() == 1 &&
+ "Can only handle adding one predecessor to a new block.");
+ // Need to remove new blocks from PredMap. Remove below to not invalidate
+ // iterator here.
+ NewBlocks.insert(BB);
+ }
+ }
+ // Nothing to process for new/cloned blocks.
+ for (auto *BB : NewBlocks)
+ PredMap.erase(BB);
+
+ SmallVector<BasicBlock *, 8> BlocksToProcess;
+ SmallVector<BasicBlock *, 16> BlocksWithDefsToReplace;
+
+ // First create MemoryPhis in all blocks that don't have one. Create in the
+ // order found in Updates, not in PredMap, to get deterministic numbering.
+ for (auto &Edge : Updates) {
+ BasicBlock *BB = Edge.getTo();
+ if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB))
+ MSSA->createMemoryPhi(BB);
+ }
+
+ // Now we'll fill in the MemoryPhis with the right incoming values.
+ for (auto &BBPredPair : PredMap) {
+ auto *BB = BBPredPair.first;
+ const auto &PrevBlockSet = BBPredPair.second.Prev;
+ const auto &AddedBlockSet = BBPredPair.second.Added;
+ assert(!PrevBlockSet.empty() &&
+ "At least one previous predecessor must exist.");
+
+ // TODO: if this becomes a bottleneck, we can save on GetLastDef calls by
+ // keeping this map before the loop. We can reuse already populated entries
+ // if an edge is added from the same predecessor to two different blocks,
+ // and this does happen in rotate. Note that the map needs to be updated
+ // when deleting non-necessary phis below, if the phi is in the map by
+ // replacing the value with DefP1.
+ SmallDenseMap<BasicBlock *, MemoryAccess *> LastDefAddedPred;
+ for (auto *AddedPred : AddedBlockSet) {
+ auto *DefPn = GetLastDef(AddedPred);
+ assert(DefPn != nullptr && "Unable to find last definition.");
+ LastDefAddedPred[AddedPred] = DefPn;
+ }
+
+ MemoryPhi *NewPhi = MSSA->getMemoryAccess(BB);
+ // If Phi is not empty, add an incoming edge from each added pred. Must
+ // still compute blocks with defs to replace for this block below.
+ if (NewPhi->getNumOperands()) {
+ for (auto *Pred : AddedBlockSet) {
+ auto *LastDefForPred = LastDefAddedPred[Pred];
+ for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+ NewPhi->addIncoming(LastDefForPred, Pred);
+ }
+ } else {
+ // Pick any existing predecessor and get its definition. All other
+ // existing predecessors should have the same one, since no phi existed.
+ auto *P1 = *PrevBlockSet.begin();
+ MemoryAccess *DefP1 = GetLastDef(P1);
+
+ // Check DefP1 against all Defs in LastDefPredPair. If all the same,
+ // nothing to add.
+ bool InsertPhi = false;
+ for (auto LastDefPredPair : LastDefAddedPred)
+ if (DefP1 != LastDefPredPair.second) {
+ InsertPhi = true;
+ break;
+ }
+ if (!InsertPhi) {
+ // Since NewPhi may be used in other newly added Phis, replace all uses
+ // of NewPhi with the definition coming from all predecessors (DefP1),
+ // before deleting it.
+ NewPhi->replaceAllUsesWith(DefP1);
+ removeMemoryAccess(NewPhi);
+ continue;
+ }
+
+ // Update Phi with new values for new predecessors and old value for all
+ // other predecessors. Since AddedBlockSet and PrevBlockSet are ordered
+ // sets, the order of entries in NewPhi is deterministic.
+ for (auto *Pred : AddedBlockSet) {
+ auto *LastDefForPred = LastDefAddedPred[Pred];
+ for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+ NewPhi->addIncoming(LastDefForPred, Pred);
+ }
+ for (auto *Pred : PrevBlockSet)
+ for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I)
+ NewPhi->addIncoming(DefP1, Pred);
+
+ // Insert BB in the set of blocks that now have definition. We'll use this
+ // to compute IDF and add Phis there next.
+ BlocksToProcess.push_back(BB);
+ }
+
+ // Get all blocks that used to dominate BB and no longer do after adding
+ // AddedBlockSet, where PrevBlockSet are the previously known predecessors.
+ assert(DT.getNode(BB)->getIDom() && "BB does not have valid idom");
+ BasicBlock *PrevIDom = FindNearestCommonDominator(PrevBlockSet);
+ assert(PrevIDom && "Previous IDom should exists");
+ BasicBlock *NewIDom = DT.getNode(BB)->getIDom()->getBlock();
+ assert(NewIDom && "BB should have a new valid idom");
+ assert(DT.dominates(NewIDom, PrevIDom) &&
+ "New idom should dominate old idom");
+ GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace);
+ }
+
+ // Compute IDF and add Phis in all IDF blocks that do not have one.
+ SmallVector<BasicBlock *, 32> IDFBlocks;
+ if (!BlocksToProcess.empty()) {
+ ForwardIDFCalculator IDFs(DT);
+ SmallPtrSet<BasicBlock *, 16> DefiningBlocks(BlocksToProcess.begin(),
+ BlocksToProcess.end());
+ IDFs.setDefiningBlocks(DefiningBlocks);
+ IDFs.calculate(IDFBlocks);
+ for (auto *BBIDF : IDFBlocks) {
+ if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) {
+ // Update existing Phi.
+ // FIXME: some updates may be redundant, try to optimize and skip some.
+ for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I)
+ IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I)));
+ } else {
+ IDFPhi = MSSA->createMemoryPhi(BBIDF);
+ for (auto &Pair : children<GraphDiffInvBBPair>({GD, BBIDF})) {
+ BasicBlock *Pi = Pair.second;
+ IDFPhi->addIncoming(GetLastDef(Pi), Pi);
+ }
+ }
+ }
+ }
+
+ // Now for all defs in BlocksWithDefsToReplace, if there are uses they no
+ // longer dominate, replace those with the closest dominating def.
+ // This will also update optimized accesses, as they're also uses.
+ for (auto *BlockWithDefsToReplace : BlocksWithDefsToReplace) {
+ if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) {
+ for (auto &DefToReplaceUses : *DefsList) {
+ BasicBlock *DominatingBlock = DefToReplaceUses.getBlock();
+ Value::use_iterator UI = DefToReplaceUses.use_begin(),
+ E = DefToReplaceUses.use_end();
+ for (; UI != E;) {
+ Use &U = *UI;
+ ++UI;
+ MemoryAccess *Usr = dyn_cast<MemoryAccess>(U.getUser());
+ if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
+ BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
+ if (!DT.dominates(DominatingBlock, DominatedBlock))
+ U.set(GetLastDef(DominatedBlock));
+ } else {
+ BasicBlock *DominatedBlock = Usr->getBlock();
+ if (!DT.dominates(DominatingBlock, DominatedBlock)) {
+ if (auto *DomBlPhi = MSSA->getMemoryAccess(DominatedBlock))
+ U.set(DomBlPhi);
+ else {
+ auto *IDom = DT.getNode(DominatedBlock)->getIDom();
+ assert(IDom && "Block must have a valid IDom.");
+ U.set(GetLastDef(IDom->getBlock()));
+ }
+ cast<MemoryUseOrDef>(Usr)->resetOptimized();
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
// Move What before Where in the MemorySSA IR.
template <class WhereType>
void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
@@ -498,13 +1015,14 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
}
void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
- BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds) {
+ BasicBlock *Old, BasicBlock *New, ArrayRef<BasicBlock *> Preds,
+ bool IdenticalEdgesWereMerged) {
assert(!MSSA->getWritableBlockAccesses(New) &&
"Access list should be null for a new block.");
MemoryPhi *Phi = MSSA->getMemoryAccess(Old);
if (!Phi)
return;
- if (pred_size(Old) == 1) {
+ if (Old->hasNPredecessors(1)) {
assert(pred_size(New) == Preds.size() &&
"Should have moved all predecessors.");
MSSA->moveTo(Phi, New, MemorySSA::Beginning);
@@ -513,9 +1031,17 @@ void MemorySSAUpdater::wireOldPredecessorsToNewImmediatePredecessor(
"new immediate predecessor.");
MemoryPhi *NewPhi = MSSA->createMemoryPhi(New);
SmallPtrSet<BasicBlock *, 16> PredsSet(Preds.begin(), Preds.end());
+ // Currently only support the case of removing a single incoming edge when
+ // identical edges were not merged.
+ if (!IdenticalEdgesWereMerged)
+ assert(PredsSet.size() == Preds.size() &&
+ "If identical edges were not merged, we cannot have duplicate "
+ "blocks in the predecessors");
Phi->unorderedDeleteIncomingIf([&](MemoryAccess *MA, BasicBlock *B) {
if (PredsSet.count(B)) {
NewPhi->addIncoming(MA, B);
+ if (!IdenticalEdgesWereMerged)
+ PredsSet.erase(B);
return true;
}
return false;
@@ -578,9 +1104,9 @@ void MemorySSAUpdater::removeBlocks(
const SmallPtrSetImpl<BasicBlock *> &DeadBlocks) {
// First delete all uses of BB in MemoryPhis.
for (BasicBlock *BB : DeadBlocks) {
- TerminatorInst *TI = BB->getTerminator();
+ Instruction *TI = BB->getTerminator();
assert(TI && "Basic block expected to have a terminator instruction");
- for (BasicBlock *Succ : TI->successors())
+ for (BasicBlock *Succ : successors(TI))
if (!DeadBlocks.count(Succ))
if (MemoryPhi *MP = MSSA->getMemoryAccess(Succ)) {
MP->unorderedDeleteIncomingBlock(BB);
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 17dae20ce3a1..87f76d43bb1e 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -74,9 +74,17 @@ cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
-static void findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
+// Return true if any of the operands contains blockaddress. This is important
+// to know when computing summary for global var, because if global variable
+// references basic block address we can't import it separately from function
+// containing that basic block. For simplicity we currently don't import such
+// global vars at all. When importing function we aren't interested if any
+// instruction in it takes an address of any basic block, because instruction
+// can only take an address of basic block located in the same function.
+static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
SetVector<ValueInfo> &RefEdges,
SmallPtrSet<const User *, 8> &Visited) {
+ bool HasBlockAddress = false;
SmallVector<const User *, 32> Worklist;
Worklist.push_back(CurUser);
@@ -92,8 +100,10 @@ static void findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
const User *Operand = dyn_cast<User>(OI);
if (!Operand)
continue;
- if (isa<BlockAddress>(Operand))
+ if (isa<BlockAddress>(Operand)) {
+ HasBlockAddress = true;
continue;
+ }
if (auto *GV = dyn_cast<GlobalValue>(Operand)) {
// We have a reference to a global value. This should be added to
// the reference set unless it is a callee. Callees are handled
@@ -105,6 +115,7 @@ static void findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
Worklist.push_back(Operand);
}
}
+ return HasBlockAddress;
}
static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
@@ -147,7 +158,8 @@ static void addIntrinsicToSummary(
SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
- SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls) {
+ SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
+ DominatorTree &DT) {
switch (CI->getCalledFunction()->getIntrinsicID()) {
case Intrinsic::type_test: {
auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
@@ -172,7 +184,7 @@ static void addIntrinsicToSummary(
SmallVector<DevirtCallSite, 4> DevirtCalls;
SmallVector<CallInst *, 4> Assumes;
- findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
for (auto &Call : DevirtCalls)
addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
TypeTestAssumeConstVCalls);
@@ -192,7 +204,7 @@ static void addIntrinsicToSummary(
SmallVector<Instruction *, 4> Preds;
bool HasNonCallUses = false;
findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
- HasNonCallUses, CI);
+ HasNonCallUses, CI, DT);
// Any non-call uses of the result of llvm.type.checked.load will
// prevent us from optimizing away the llvm.type.test.
if (HasNonCallUses)
@@ -208,11 +220,19 @@ static void addIntrinsicToSummary(
}
}
-static void
-computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
- const Function &F, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, bool HasLocalsInUsedOrAsm,
- DenseSet<GlobalValue::GUID> &CantBePromoted) {
+static bool isNonVolatileLoad(const Instruction *I) {
+ if (const auto *LI = dyn_cast<LoadInst>(I))
+ return !LI->isVolatile();
+
+ return false;
+}
+
+static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+ const Function &F, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, DominatorTree &DT,
+ bool HasLocalsInUsedOrAsm,
+ DenseSet<GlobalValue::GUID> &CantBePromoted,
+ bool IsThinLTO) {
// Summary not currently supported for anonymous functions, they should
// have been named.
assert(F.hasName());
@@ -233,6 +253,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// Add personality function, prefix data and prologue data to function's ref
// list.
findRefEdges(Index, &F, RefEdges, Visited);
+ std::vector<const Instruction *> NonVolatileLoads;
bool HasInlineAsmMaybeReferencingInternal = false;
for (const BasicBlock &BB : F)
@@ -240,6 +261,13 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (isa<DbgInfoIntrinsic>(I))
continue;
++NumInsts;
+ if (isNonVolatileLoad(&I)) {
+ // Postpone processing of non-volatile load instructions
+ // See comments below
+ Visited.insert(&I);
+ NonVolatileLoads.push_back(&I);
+ continue;
+ }
findRefEdges(Index, &I, RefEdges, Visited);
auto CS = ImmutableCallSite(&I);
if (!CS)
@@ -273,7 +301,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
if (CI && CalledFunction->isIntrinsic()) {
addIntrinsicToSummary(
CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
- TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls);
+ TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
continue;
}
// We should have named any anonymous globals
@@ -329,6 +357,24 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
}
+ // By now we processed all instructions in a function, except
+ // non-volatile loads. All new refs we add in a loop below
+ // are obviously constant. All constant refs are grouped in the
+ // end of RefEdges vector, so we can use a single integer value
+ // to identify them.
+ unsigned RefCnt = RefEdges.size();
+ for (const Instruction *I : NonVolatileLoads) {
+ Visited.erase(I);
+ findRefEdges(Index, I, RefEdges, Visited);
+ }
+ std::vector<ValueInfo> Refs = RefEdges.takeVector();
+ // Regular LTO module doesn't participate in ThinLTO import,
+ // so no reference from it can be readonly, since this would
+ // require importing variable as local copy
+ if (IsThinLTO)
+ for (; RefCnt < Refs.size(); ++RefCnt)
+ Refs[RefCnt].setReadOnly();
+
// Explicit add hot edges to enforce importing for designated GUIDs for
// sample PGO, to enable the same inlines as the profiled optimized binary.
for (auto &I : F.getImportGUIDs())
@@ -339,22 +385,18 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport =
- NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
- // Inliner doesn't handle variadic functions.
- // FIXME: refactor this to use the same code that inliner is using.
- F.isVarArg() ||
- // Don't try to import functions with noinline attribute.
- F.getAttributes().hasFnAttribute(Attribute::NoInline);
+ NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
/* Live = */ false, F.isDSOLocal());
FunctionSummary::FFlags FunFlags{
F.hasFnAttribute(Attribute::ReadNone),
F.hasFnAttribute(Attribute::ReadOnly),
- F.hasFnAttribute(Attribute::NoRecurse),
- F.returnDoesNotAlias(),
- };
+ F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
+ // FIXME: refactor this to use the same code that inliner is using.
+ // Don't try to import functions with noinline attribute.
+ F.getAttributes().hasFnAttribute(Attribute::NoInline)};
auto FuncSummary = llvm::make_unique<FunctionSummary>(
- Flags, NumInsts, FunFlags, RefEdges.takeVector(),
+ Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
CallGraphEdges.takeVector(), TypeTests.takeVector(),
TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
TypeTestAssumeConstVCalls.takeVector(),
@@ -369,14 +411,21 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
DenseSet<GlobalValue::GUID> &CantBePromoted) {
SetVector<ValueInfo> RefEdges;
SmallPtrSet<const User *, 8> Visited;
- findRefEdges(Index, &V, RefEdges, Visited);
+ bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
/* Live = */ false, V.isDSOLocal());
- auto GVarSummary =
- llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+
+ // Don't mark variables we won't be able to internalize as read-only.
+ GlobalVarSummary::GVarFlags VarFlags(
+ !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
+ !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
+ auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
+ RefEdges.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(V.getGUID());
+ if (HasBlockAddress)
+ GVarSummary->setNotEligibleToImport();
Index.addGlobalValueSummary(V, std::move(GVarSummary));
}
@@ -408,7 +457,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
ProfileSummaryInfo *PSI) {
assert(PSI);
- ModuleSummaryIndex Index(/*HaveGVs=*/true);
+ bool EnableSplitLTOUnit = false;
+ if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("EnableSplitLTOUnit")))
+ EnableSplitLTOUnit = MD->getZExtValue();
+ ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
// Identify the local values in the llvm.used and llvm.compiler.used sets,
// which should not be exported as they would then require renaming and
@@ -460,13 +513,15 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
if (Function *F = dyn_cast<Function>(GV)) {
std::unique_ptr<FunctionSummary> Summary =
llvm::make_unique<FunctionSummary>(
- GVFlags, 0,
+ GVFlags, /*InstCount=*/0,
FunctionSummary::FFlags{
F->hasFnAttribute(Attribute::ReadNone),
F->hasFnAttribute(Attribute::ReadOnly),
F->hasFnAttribute(Attribute::NoRecurse),
- F->returnDoesNotAlias()},
- ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
+ F->returnDoesNotAlias(),
+ /* NoInline = */ false},
+ /*EntryCount=*/0, ArrayRef<ValueInfo>{},
+ ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
ArrayRef<FunctionSummary::VFuncId>{},
ArrayRef<FunctionSummary::VFuncId>{},
@@ -475,33 +530,40 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
- llvm::make_unique<GlobalVarSummary>(GVFlags,
- ArrayRef<ValueInfo>{});
+ llvm::make_unique<GlobalVarSummary>(
+ GVFlags, GlobalVarSummary::GVarFlags(),
+ ArrayRef<ValueInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
}
});
}
+ bool IsThinLTO = true;
+ if (auto *MD =
+ mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
+ IsThinLTO = MD->getZExtValue();
+
// Compute summaries for all functions defined in module, and save in the
// index.
for (auto &F : M) {
if (F.isDeclaration())
continue;
+ DominatorTree DT(const_cast<Function &>(F));
BlockFrequencyInfo *BFI = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFIPtr;
if (GetBFICallback)
BFI = GetBFICallback(F);
else if (F.hasProfileData()) {
- LoopInfo LI{DominatorTree(const_cast<Function &>(F))};
+ LoopInfo LI{DT};
BranchProbabilityInfo BPI{F, LI};
BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
BFI = BFIPtr.get();
}
- computeFunctionSummary(Index, M, F, BFI, PSI,
+ computeFunctionSummary(Index, M, F, BFI, PSI, DT,
!LocalsUsed.empty() || HasLocalInlineAsmSymbol,
- CantBePromoted);
+ CantBePromoted, IsThinLTO);
}
// Compute summaries for all variables defined in module, and save in the
@@ -532,11 +594,6 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
setLiveRoot(Index, "llvm.global_dtors");
setLiveRoot(Index, "llvm.global.annotations");
- bool IsThinLTO = true;
- if (auto *MD =
- mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
- IsThinLTO = MD->getZExtValue();
-
for (auto &GlobalList : Index) {
// Ignore entries for references that are undefined in the current module.
if (GlobalList.second.SummaryList.empty())
@@ -606,7 +663,7 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
}
bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
- auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
Index.emplace(buildModuleSummaryIndex(
M,
[this](const Function &F) {
@@ -614,7 +671,7 @@ bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
*const_cast<Function *>(&F))
.getBFI());
},
- &PSI));
+ PSI));
return false;
}
diff --git a/lib/Analysis/MustExecute.cpp b/lib/Analysis/MustExecute.cpp
index 8e85366b4618..180c38ddacc2 100644
--- a/lib/Analysis/MustExecute.cpp
+++ b/lib/Analysis/MustExecute.cpp
@@ -22,20 +22,32 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-/// Computes loop safety information, checks loop body & header
-/// for the possibility of may throw exception.
-///
-void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
+const DenseMap<BasicBlock *, ColorVector> &
+LoopSafetyInfo::getBlockColors() const {
+ return BlockColors;
+}
+
+void LoopSafetyInfo::copyColors(BasicBlock *New, BasicBlock *Old) {
+ ColorVector &ColorsForNewBlock = BlockColors[New];
+ ColorVector &ColorsForOldBlock = BlockColors[Old];
+ ColorsForNewBlock = ColorsForOldBlock;
+}
+
+bool SimpleLoopSafetyInfo::blockMayThrow(const BasicBlock *BB) const {
+ (void)BB;
+ return anyBlockMayThrow();
+}
+
+bool SimpleLoopSafetyInfo::anyBlockMayThrow() const {
+ return MayThrow;
+}
+
+void SimpleLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
assert(CurLoop != nullptr && "CurLoop can't be null");
BasicBlock *Header = CurLoop->getHeader();
- // Setting default safety values.
- SafetyInfo->MayThrow = false;
- SafetyInfo->HeaderMayThrow = false;
// Iterate over header and compute safety info.
- SafetyInfo->HeaderMayThrow =
- !isGuaranteedToTransferExecutionToSuccessor(Header);
-
- SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
+ HeaderMayThrow = !isGuaranteedToTransferExecutionToSuccessor(Header);
+ MayThrow = HeaderMayThrow;
// Iterate over loop instructions and compute safety info.
// Skip header as it has been computed and stored in HeaderMayThrow.
// The first block in loopinfo.Blocks is guaranteed to be the header.
@@ -43,23 +55,59 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
"First block must be header");
for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
BBE = CurLoop->block_end();
- (BB != BBE) && !SafetyInfo->MayThrow; ++BB)
- SafetyInfo->MayThrow |=
- !isGuaranteedToTransferExecutionToSuccessor(*BB);
+ (BB != BBE) && !MayThrow; ++BB)
+ MayThrow |= !isGuaranteedToTransferExecutionToSuccessor(*BB);
+
+ computeBlockColors(CurLoop);
+}
+bool ICFLoopSafetyInfo::blockMayThrow(const BasicBlock *BB) const {
+ return ICF.hasICF(BB);
+}
+
+bool ICFLoopSafetyInfo::anyBlockMayThrow() const {
+ return MayThrow;
+}
+
+void ICFLoopSafetyInfo::computeLoopSafetyInfo(const Loop *CurLoop) {
+ assert(CurLoop != nullptr && "CurLoop can't be null");
+ ICF.clear();
+ MW.clear();
+ MayThrow = false;
+ // Figure out the fact that at least one block may throw.
+ for (auto &BB : CurLoop->blocks())
+ if (ICF.hasICF(&*BB)) {
+ MayThrow = true;
+ break;
+ }
+ computeBlockColors(CurLoop);
+}
+
+void ICFLoopSafetyInfo::insertInstructionTo(const Instruction *Inst,
+ const BasicBlock *BB) {
+ ICF.insertInstructionTo(Inst, BB);
+ MW.insertInstructionTo(Inst, BB);
+}
+
+void ICFLoopSafetyInfo::removeInstruction(const Instruction *Inst) {
+ ICF.removeInstruction(Inst);
+ MW.removeInstruction(Inst);
+}
+
+void LoopSafetyInfo::computeBlockColors(const Loop *CurLoop) {
// Compute funclet colors if we might sink/hoist in a function with a funclet
// personality routine.
Function *Fn = CurLoop->getHeader()->getParent();
if (Fn->hasPersonalityFn())
if (Constant *PersonalityFn = Fn->getPersonalityFn())
if (isScopedEHPersonality(classifyEHPersonality(PersonalityFn)))
- SafetyInfo->BlockColors = colorEHFunclets(*Fn);
+ BlockColors = colorEHFunclets(*Fn);
}
/// Return true if we can prove that the given ExitBlock is not reached on the
/// first iteration of the given loop. That is, the backedge of the loop must
/// be executed before the ExitBlock is executed in any dynamic execution trace.
-static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
+static bool CanProveNotTakenFirstIteration(const BasicBlock *ExitBlock,
const DominatorTree *DT,
const Loop *CurLoop) {
auto *CondExitBlock = ExitBlock->getSinglePredecessor();
@@ -99,15 +147,94 @@ static bool CanProveNotTakenFirstIteration(BasicBlock *ExitBlock,
return SimpleCst->isAllOnesValue();
}
+/// Collect all blocks from \p CurLoop which lie on all possible paths from
+/// the header of \p CurLoop (inclusive) to BB (exclusive) into the set
+/// \p Predecessors. If \p BB is the header, \p Predecessors will be empty.
+static void collectTransitivePredecessors(
+ const Loop *CurLoop, const BasicBlock *BB,
+ SmallPtrSetImpl<const BasicBlock *> &Predecessors) {
+ assert(Predecessors.empty() && "Garbage in predecessors set?");
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+ if (BB == CurLoop->getHeader())
+ return;
+ SmallVector<const BasicBlock *, 4> WorkList;
+ for (auto *Pred : predecessors(BB)) {
+ Predecessors.insert(Pred);
+ WorkList.push_back(Pred);
+ }
+ while (!WorkList.empty()) {
+ auto *Pred = WorkList.pop_back_val();
+ assert(CurLoop->contains(Pred) && "Should only reach loop blocks!");
+ // We are not interested in backedges and we don't want to leave loop.
+ if (Pred == CurLoop->getHeader())
+ continue;
+ // TODO: If BB lies in an inner loop of CurLoop, this will traverse over all
+ // blocks of this inner loop, even those that are always executed AFTER the
+ // BB. It may make our analysis more conservative than it could be, see test
+ // @nested and @nested_no_throw in test/Analysis/MustExecute/loop-header.ll.
+ // We can ignore backedge of all loops containing BB to get a sligtly more
+ // optimistic result.
+ for (auto *PredPred : predecessors(Pred))
+ if (Predecessors.insert(PredPred).second)
+ WorkList.push_back(PredPred);
+ }
+}
+
+bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop,
+ const BasicBlock *BB,
+ const DominatorTree *DT) const {
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+
+ // Fast path: header is always reached once the loop is entered.
+ if (BB == CurLoop->getHeader())
+ return true;
+
+ // Collect all transitive predecessors of BB in the same loop. This set will
+ // be a subset of the blocks within the loop.
+ SmallPtrSet<const BasicBlock *, 4> Predecessors;
+ collectTransitivePredecessors(CurLoop, BB, Predecessors);
+
+ // Make sure that all successors of all predecessors of BB are either:
+ // 1) BB,
+ // 2) Also predecessors of BB,
+ // 3) Exit blocks which are not taken on 1st iteration.
+ // Memoize blocks we've already checked.
+ SmallPtrSet<const BasicBlock *, 4> CheckedSuccessors;
+ for (auto *Pred : Predecessors) {
+ // Predecessor block may throw, so it has a side exit.
+ if (blockMayThrow(Pred))
+ return false;
+ for (auto *Succ : successors(Pred))
+ if (CheckedSuccessors.insert(Succ).second &&
+ Succ != BB && !Predecessors.count(Succ))
+ // By discharging conditions that are not executed on the 1st iteration,
+ // we guarantee that *at least* on the first iteration all paths from
+ // header that *may* execute will lead us to the block of interest. So
+ // that if we had virtually peeled one iteration away, in this peeled
+ // iteration the set of predecessors would contain only paths from
+ // header to BB without any exiting edges that may execute.
+ //
+ // TODO: We only do it for exiting edges currently. We could use the
+ // same function to skip some of the edges within the loop if we know
+ // that they will not be taken on the 1st iteration.
+ //
+ // TODO: If we somehow know the number of iterations in loop, the same
+ // check may be done for any arbitrary N-th iteration as long as N is
+ // not greater than minimum number of iterations in this loop.
+ if (CurLoop->contains(Succ) ||
+ !CanProveNotTakenFirstIteration(Succ, DT, CurLoop))
+ return false;
+ }
+
+ // All predecessors can only lead us to BB.
+ return true;
+}
+
/// Returns true if the instruction in a loop is guaranteed to execute at least
/// once.
-bool llvm::isGuaranteedToExecute(const Instruction &Inst,
- const DominatorTree *DT, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo) {
- // We have to check to make sure that the instruction dominates all
- // of the exit blocks. If it doesn't, then there is a path out of the loop
- // which does not execute this instruction, so we can't hoist it.
-
+bool SimpleLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop) const {
// If the instruction is in the header block for the loop (which is very
// common), it is always guaranteed to dominate the exit blocks. Since this
// is a common case, and can save some work, check it now.
@@ -116,52 +243,48 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst,
// Inst unless we can prove that Inst comes before the potential implicit
// exit. At the moment, we use a (cheap) hack for the common case where
// the instruction of interest is the first one in the block.
- return !SafetyInfo->HeaderMayThrow ||
- Inst.getParent()->getFirstNonPHIOrDbg() == &Inst;
+ return !HeaderMayThrow ||
+ Inst.getParent()->getFirstNonPHIOrDbg() == &Inst;
- // Somewhere in this loop there is an instruction which may throw and make us
- // exit the loop.
- if (SafetyInfo->MayThrow)
- return false;
+ // If there is a path from header to exit or latch that doesn't lead to our
+ // instruction's block, return false.
+ return allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT);
+}
- // Note: There are two styles of reasoning intermixed below for
- // implementation efficiency reasons. They are:
- // 1) If we can prove that the instruction dominates all exit blocks, then we
- // know the instruction must have executed on *some* iteration before we
- // exit. We do not prove *which* iteration the instruction must execute on.
- // 2) If we can prove that the instruction dominates the latch and all exits
- // which might be taken on the first iteration, we know the instruction must
- // execute on the first iteration. This second style allows a conditional
- // exit before the instruction of interest which is provably not taken on the
- // first iteration. This is a quite common case for range check like
- // patterns. TODO: support loops with multiple latches.
-
- const bool InstDominatesLatch =
- CurLoop->getLoopLatch() != nullptr &&
- DT->dominates(Inst.getParent(), CurLoop->getLoopLatch());
-
- // Get the exit blocks for the current loop.
- SmallVector<BasicBlock *, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
-
- // Verify that the block dominates each of the exit blocks of the loop.
- for (BasicBlock *ExitBlock : ExitBlocks)
- if (!DT->dominates(Inst.getParent(), ExitBlock))
- if (!InstDominatesLatch ||
- !CanProveNotTakenFirstIteration(ExitBlock, DT, CurLoop))
- return false;
-
- // As a degenerate case, if the loop is statically infinite then we haven't
- // proven anything since there are no exit blocks.
- if (ExitBlocks.empty())
- return false;
+bool ICFLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst,
+ const DominatorTree *DT,
+ const Loop *CurLoop) const {
+ return !ICF.isDominatedByICFIFromSameBlock(&Inst) &&
+ allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT);
+}
+
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB,
+ const Loop *CurLoop) const {
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
- // FIXME: In general, we have to prove that the loop isn't an infinite loop.
- // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
- // just a special case of this.)
+ // Fast path: there are no instructions before header.
+ if (BB == CurLoop->getHeader())
+ return true;
+
+ // Collect all transitive predecessors of BB in the same loop. This set will
+ // be a subset of the blocks within the loop.
+ SmallPtrSet<const BasicBlock *, 4> Predecessors;
+ collectTransitivePredecessors(CurLoop, BB, Predecessors);
+ // Find if there any instruction in either predecessor that could write
+ // to memory.
+ for (auto *Pred : Predecessors)
+ if (MW.mayWriteToMemory(Pred))
+ return false;
return true;
}
+bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I,
+ const Loop *CurLoop) const {
+ auto *BB = I.getParent();
+ assert(CurLoop->contains(BB) && "Should only be called for loop blocks!");
+ return !MW.isDominatedByMemoryWriteFromSameBlock(&I) &&
+ doesNotWriteMemoryBefore(BB, CurLoop);
+}
namespace {
struct MustExecutePrinter : public FunctionPass {
@@ -195,9 +318,9 @@ static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
// TODO: merge these two routines. For the moment, we display the best
// result obtained by *either* implementation. This is a bit unfair since no
// caller actually gets the full power at the moment.
- LoopSafetyInfo LSI;
- computeLoopSafetyInfo(&LSI, L);
- return isGuaranteedToExecute(I, DT, L, &LSI) ||
+ SimpleLoopSafetyInfo LSI;
+ LSI.computeLoopSafetyInfo(L);
+ return LSI.isGuaranteedToExecute(I, DT, L) ||
isGuaranteedToExecuteForEveryIteration(&I, L);
}
diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 096ea661ecb6..95ae1a6e744f 100644
--- a/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -106,12 +106,12 @@ FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
return AAResultBase::getModRefBehavior(F);
}
-ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo ObjCARCAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
if (!EnableARCOpts)
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
- switch (GetBasicARCInstKind(CS.getInstruction())) {
+ switch (GetBasicARCInstKind(Call)) {
case ARCInstKind::Retain:
case ARCInstKind::RetainRV:
case ARCInstKind::Autorelease:
@@ -128,7 +128,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
break;
}
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
index f268e2a9abdd..31c432711834 100644
--- a/lib/Analysis/ObjCARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -85,97 +85,73 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
}
ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
- Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- // No (mandatory) arguments.
- if (AI == AE)
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush)
- .Case("clang.arc.use", ARCInstKind::IntrinsicUser)
- .Default(ARCInstKind::CallOrUser);
-
- // One argument.
- const Argument *A0 = &*AI++;
- if (AI == AE) {
- // Argument is a pointer.
- PointerType *PTy = dyn_cast<PointerType>(A0->getType());
- if (!PTy)
- return ARCInstKind::CallOrUser;
-
- Type *ETy = PTy->getElementType();
- // Argument is i8*.
- if (ETy->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_retain", ARCInstKind::Retain)
- .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
- .Case("objc_unsafeClaimAutoreleasedReturnValue", ARCInstKind::ClaimRV)
- .Case("objc_retainBlock", ARCInstKind::RetainBlock)
- .Case("objc_release", ARCInstKind::Release)
- .Case("objc_autorelease", ARCInstKind::Autorelease)
- .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
- .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
- .Case("objc_retainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
- .Case("objc_retain_autorelease", ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutoreleaseReturnValue",
- ARCInstKind::FusedRetainAutoreleaseRV)
- .Case("objc_sync_enter", ARCInstKind::User)
- .Case("objc_sync_exit", ARCInstKind::User)
- .Default(ARCInstKind::CallOrUser);
-
- // Argument is i8**
- if (PointerType *Pte = dyn_cast<PointerType>(ETy))
- if (Pte->getElementType()->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
- .Case("objc_loadWeak", ARCInstKind::LoadWeak)
- .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
- .Default(ARCInstKind::CallOrUser);
-
- // Anything else with one argument.
+ Intrinsic::ID ID = F->getIntrinsicID();
+ switch (ID) {
+ default:
return ARCInstKind::CallOrUser;
+ case Intrinsic::objc_autorelease:
+ return ARCInstKind::Autorelease;
+ case Intrinsic::objc_autoreleasePoolPop:
+ return ARCInstKind::AutoreleasepoolPop;
+ case Intrinsic::objc_autoreleasePoolPush:
+ return ARCInstKind::AutoreleasepoolPush;
+ case Intrinsic::objc_autoreleaseReturnValue:
+ return ARCInstKind::AutoreleaseRV;
+ case Intrinsic::objc_copyWeak:
+ return ARCInstKind::CopyWeak;
+ case Intrinsic::objc_destroyWeak:
+ return ARCInstKind::DestroyWeak;
+ case Intrinsic::objc_initWeak:
+ return ARCInstKind::InitWeak;
+ case Intrinsic::objc_loadWeak:
+ return ARCInstKind::LoadWeak;
+ case Intrinsic::objc_loadWeakRetained:
+ return ARCInstKind::LoadWeakRetained;
+ case Intrinsic::objc_moveWeak:
+ return ARCInstKind::MoveWeak;
+ case Intrinsic::objc_release:
+ return ARCInstKind::Release;
+ case Intrinsic::objc_retain:
+ return ARCInstKind::Retain;
+ case Intrinsic::objc_retainAutorelease:
+ return ARCInstKind::FusedRetainAutorelease;
+ case Intrinsic::objc_retainAutoreleaseReturnValue:
+ return ARCInstKind::FusedRetainAutoreleaseRV;
+ case Intrinsic::objc_retainAutoreleasedReturnValue:
+ return ARCInstKind::RetainRV;
+ case Intrinsic::objc_retainBlock:
+ return ARCInstKind::RetainBlock;
+ case Intrinsic::objc_storeStrong:
+ return ARCInstKind::StoreStrong;
+ case Intrinsic::objc_storeWeak:
+ return ARCInstKind::StoreWeak;
+ case Intrinsic::objc_clang_arc_use:
+ return ARCInstKind::IntrinsicUser;
+ case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
+ return ARCInstKind::ClaimRV;
+ case Intrinsic::objc_retainedObject:
+ return ARCInstKind::NoopCast;
+ case Intrinsic::objc_unretainedObject:
+ return ARCInstKind::NoopCast;
+ case Intrinsic::objc_unretainedPointer:
+ return ARCInstKind::NoopCast;
+ case Intrinsic::objc_retain_autorelease:
+ return ARCInstKind::FusedRetainAutorelease;
+ case Intrinsic::objc_sync_enter:
+ return ARCInstKind::User;
+ case Intrinsic::objc_sync_exit:
+ return ARCInstKind::User;
+ case Intrinsic::objc_arc_annotation_topdown_bbstart:
+ case Intrinsic::objc_arc_annotation_topdown_bbend:
+ case Intrinsic::objc_arc_annotation_bottomup_bbstart:
+ case Intrinsic::objc_arc_annotation_bottomup_bbend:
+ // Ignore annotation calls. This is important to stop the
+ // optimizer from treating annotations as uses which would
+ // make the state of the pointers they are attempting to
+ // elucidate to be incorrect.
+ return ARCInstKind::None;
}
-
- // Two arguments, first is i8**.
- const Argument *A1 = &*AI++;
- if (AI == AE)
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
- if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
- if (Pte->getElementType()->isIntegerTy(8))
- if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
- Type *ETy1 = PTy1->getElementType();
- // Second argument is i8*
- if (ETy1->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_storeWeak", ARCInstKind::StoreWeak)
- .Case("objc_initWeak", ARCInstKind::InitWeak)
- .Case("objc_storeStrong", ARCInstKind::StoreStrong)
- .Default(ARCInstKind::CallOrUser);
- // Second argument is i8**.
- if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
- if (Pte1->getElementType()->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_moveWeak", ARCInstKind::MoveWeak)
- .Case("objc_copyWeak", ARCInstKind::CopyWeak)
- // Ignore annotation calls. This is important to stop the
- // optimizer from treating annotations as uses which would
- // make the state of the pointers they are attempting to
- // elucidate to be incorrect.
- .Case("llvm.arc.annotation.topdown.bbstart",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.topdown.bbend",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.bottomup.bbstart",
- ARCInstKind::None)
- .Case("llvm.arc.annotation.bottomup.bbend",
- ARCInstKind::None)
- .Default(ARCInstKind::CallOrUser);
- }
-
- // Anything else.
- return ARCInstKind::CallOrUser;
}
// A whitelist of intrinsics that we know do not use objc pointers or decrement
diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp
index 6c47651eae9e..5f4fe0f7dda2 100644
--- a/lib/Analysis/OrderedBasicBlock.cpp
+++ b/lib/Analysis/OrderedBasicBlock.cpp
@@ -37,6 +37,8 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
const Instruction *Inst = nullptr;
assert(!(LastInstFound == BB->end() && NextInstPos != 0) &&
"Instruction supposed to be in NumberedInsts");
+ assert(A->getParent() == BB && "Instruction supposed to be in the block!");
+ assert(B->getParent() == BB && "Instruction supposed to be in the block!");
// Start the search with the instruction found in the last lookup round.
auto II = BB->begin();
@@ -65,6 +67,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
assert(A->getParent() == B->getParent() &&
"Instructions must be in the same basic block!");
+ assert(A->getParent() == BB && "Instructions must be in the tracked block!");
// First we lookup the instructions. If they don't exist, lookup will give us
// back ::end(). If they both exist, we compare the numbers. Otherwise, if NA
diff --git a/lib/Analysis/OrderedInstructions.cpp b/lib/Analysis/OrderedInstructions.cpp
new file mode 100644
index 000000000000..7b155208c02e
--- /dev/null
+++ b/lib/Analysis/OrderedInstructions.cpp
@@ -0,0 +1,51 @@
+//===-- OrderedInstructions.cpp - Instruction dominance function ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utility to check dominance relation of 2 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/OrderedInstructions.h"
+using namespace llvm;
+
+bool OrderedInstructions::localDominates(const Instruction *InstA,
+ const Instruction *InstB) const {
+ assert(InstA->getParent() == InstB->getParent() &&
+ "Instructions must be in the same basic block");
+
+ const BasicBlock *IBB = InstA->getParent();
+ auto OBB = OBBMap.find(IBB);
+ if (OBB == OBBMap.end())
+ OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first;
+ return OBB->second->dominates(InstA, InstB);
+}
+
+/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation
+/// if the instructions are in the same basic block, Otherwise, use dominator
+/// tree.
+bool OrderedInstructions::dominates(const Instruction *InstA,
+ const Instruction *InstB) const {
+ // Use ordered basic block to do dominance check in case the 2 instructions
+ // are in the same basic block.
+ if (InstA->getParent() == InstB->getParent())
+ return localDominates(InstA, InstB);
+ return DT->dominates(InstA->getParent(), InstB->getParent());
+}
+
+bool OrderedInstructions::dfsBefore(const Instruction *InstA,
+ const Instruction *InstB) const {
+ // Use ordered basic block in case the 2 instructions are in the same basic
+ // block.
+ if (InstA->getParent() == InstB->getParent())
+ return localDominates(InstA, InstB);
+
+ DomTreeNode *DA = DT->getNode(InstA->getParent());
+ DomTreeNode *DB = DT->getNode(InstB->getParent());
+ return DA->getDFSNumIn() < DB->getDFSNumIn();
+}
diff --git a/lib/Analysis/PhiValues.cpp b/lib/Analysis/PhiValues.cpp
index ef121815d2cf..729227c86697 100644
--- a/lib/Analysis/PhiValues.cpp
+++ b/lib/Analysis/PhiValues.cpp
@@ -14,6 +14,16 @@
using namespace llvm;
+void PhiValues::PhiValuesCallbackVH::deleted() {
+ PV->invalidateValue(getValPtr());
+}
+
+void PhiValues::PhiValuesCallbackVH::allUsesReplacedWith(Value *) {
+ // We could potentially update the cached values we have with the new value,
+ // but it's simpler to just treat the old value as invalidated.
+ PV->invalidateValue(getValPtr());
+}
+
bool PhiValues::invalidate(Function &, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
// PhiValues is invalidated if it isn't preserved.
@@ -46,6 +56,7 @@ void PhiValues::processPhi(const PHINode *Phi,
DepthMap[Phi] = DepthNumber;
// Recursively process the incoming phis of this phi.
+ TrackedValues.insert(PhiValuesCallbackVH(const_cast<PHINode *>(Phi), this));
for (Value *PhiOp : Phi->incoming_values()) {
if (PHINode *PhiPhiOp = dyn_cast<PHINode>(PhiOp)) {
// Recurse if the phi has not yet been visited.
@@ -56,6 +67,8 @@ void PhiValues::processPhi(const PHINode *Phi,
// phi are part of the same component, so adjust the depth number.
if (!ReachableMap.count(DepthMap[PhiPhiOp]))
DepthMap[Phi] = std::min(DepthMap[Phi], DepthMap[PhiPhiOp]);
+ } else {
+ TrackedValues.insert(PhiValuesCallbackVH(PhiOp, this));
}
}
@@ -122,6 +135,10 @@ void PhiValues::invalidateValue(const Value *V) {
NonPhiReachableMap.erase(N);
ReachableMap.erase(N);
}
+ // This value is no longer tracked
+ auto It = TrackedValues.find_as(V);
+ if (It != TrackedValues.end())
+ TrackedValues.erase(It);
}
void PhiValues::releaseMemory() {
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index fb591f5d6a69..1d70c75f2e1c 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoffCold(
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
-static cl::opt<bool> ProfileSampleAccurate(
- "profile-sample-accurate", cl::Hidden, cl::init(false),
- cl::desc("If the sample profile is accurate, we will mark all un-sampled "
- "callsite as cold. Otherwise, treat un-sampled callsites as if "
- "we have no profile."));
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
cl::init(15000), cl::ZeroOrMore,
@@ -51,6 +46,18 @@ static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
" blocks required to reach the -profile-summary-cutoff-hot"
" percentile exceeds this count."));
+// The next two options override the counts derived from summary computation and
+// are useful for debugging purposes.
+static cl::opt<int> ProfileSummaryHotCount(
+ "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
+ cl::desc("A fixed hot count that overrides the count derived from"
+ " profile-summary-cutoff-hot"));
+
+static cl::opt<int> ProfileSummaryColdCount(
+ "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
+ cl::desc("A fixed cold count that overrides the count derived from"
+ " profile-summary-cutoff-cold"));
+
// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
uint64_t Percentile) {
@@ -139,7 +146,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F,
return true;
}
for (const auto &BB : *F)
- if (isHotBB(&BB, &BFI))
+ if (isHotBlock(&BB, &BFI))
return true;
return false;
}
@@ -168,7 +175,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
return false;
}
for (const auto &BB : *F)
- if (!isColdBB(&BB, &BFI))
+ if (!isColdBlock(&BB, &BFI))
return false;
return true;
}
@@ -198,9 +205,15 @@ void ProfileSummaryInfo::computeThresholds() {
auto &HotEntry =
getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffHot);
HotCountThreshold = HotEntry.MinCount;
+ if (ProfileSummaryHotCount.getNumOccurrences() > 0)
+ HotCountThreshold = ProfileSummaryHotCount;
auto &ColdEntry =
getEntryForPercentile(DetailedSummary, ProfileSummaryCutoffCold);
ColdCountThreshold = ColdEntry.MinCount;
+ if (ProfileSummaryColdCount.getNumOccurrences() > 0)
+ ColdCountThreshold = ProfileSummaryColdCount;
+ assert(ColdCountThreshold <= HotCountThreshold &&
+ "Cold count threshold cannot exceed hot count threshold!");
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
}
@@ -226,23 +239,23 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() {
if (!HotCountThreshold)
computeThresholds();
- return HotCountThreshold && HotCountThreshold.getValue();
+ return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX;
}
uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() {
if (!ColdCountThreshold)
computeThresholds();
- return ColdCountThreshold && ColdCountThreshold.getValue();
+ return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
}
-bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
- auto Count = BFI->getBlockProfileCount(B);
+bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) {
+ auto Count = BFI->getBlockProfileCount(BB);
return Count && isHotCount(*Count);
}
-bool ProfileSummaryInfo::isColdBB(const BasicBlock *B,
+bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
BlockFrequencyInfo *BFI) {
- auto Count = BFI->getBlockProfileCount(B);
+ auto Count = BFI->getBlockProfileCount(BB);
return Count && isColdCount(*Count);
}
@@ -260,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
// In SamplePGO, if the caller has been sampled, and there is no profile
// annotated on the callsite, we consider the callsite as cold.
- // If there is no profile for the caller, and we know the profile is
- // accurate, we consider the callsite as cold.
- return (hasSampleProfile() &&
- (CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
- CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
+ return hasSampleProfile() && CS.getCaller()->hasProfileData();
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index ed17df2e7e93..a101ff109199 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/OptBisect.h"
+#include "llvm/IR/PassTimingInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 0e715b8814ff..e5134f2eeda9 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -112,6 +112,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -162,6 +163,11 @@ static cl::opt<bool>
cl::desc("Verify no dangling value in ScalarEvolution's "
"ExprValueMap (slow)"));
+static cl::opt<bool> VerifyIR(
+ "scev-verify-ir", cl::Hidden,
+ cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"),
+ cl::init(false));
+
static cl::opt<unsigned> MulOpsInlineThreshold(
"scev-mulops-inline-threshold", cl::Hidden,
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
@@ -204,7 +210,7 @@ static cl::opt<unsigned>
static cl::opt<unsigned>
MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
cl::desc("Max coefficients in AddRec during evolving"),
- cl::init(16));
+ cl::init(8));
//===----------------------------------------------------------------------===//
// SCEV class definitions
@@ -692,10 +698,6 @@ static int CompareSCEVComplexity(
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
- // Compare NoWrap flags.
- if (LA->getNoWrapFlags() != RA->getNoWrapFlags())
- return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags();
-
// Lexicographically compare.
for (unsigned i = 0; i != LNumOps; ++i) {
int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
@@ -720,10 +722,6 @@ static int CompareSCEVComplexity(
if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
- // Compare NoWrap flags.
- if (LC->getNoWrapFlags() != RC->getNoWrapFlags())
- return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags();
-
for (unsigned i = 0; i != LNumOps; ++i) {
int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI,
LC->getOperand(i), RC->getOperand(i), DT,
@@ -2767,6 +2765,29 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
+ScalarEvolution::getOrCreateAddRecExpr(SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L, SCEV::NoWrapFlags Flags) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ ID.AddPointer(L);
+ void *IP = nullptr;
+ SCEVAddRecExpr *S =
+ static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ if (!S) {
+ const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+ std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+ S = new (SCEVAllocator)
+ SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
+ UniqueSCEVs.InsertNode(S, IP);
+ addToLoopUseLists(S);
+ }
+ S->setNoWrapFlags(Flags);
+ return S;
+}
+
+const SCEV *
ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
@@ -3045,7 +3066,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SmallVector<const SCEV*, 7> AddRecOps;
for (int x = 0, xe = AddRec->getNumOperands() +
OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
- const SCEV *Term = getZero(Ty);
+ SmallVector <const SCEV *, 7> SumOps;
for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
@@ -3060,12 +3081,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEV *CoeffTerm = getConstant(Ty, Coeff);
const SCEV *Term1 = AddRec->getOperand(y-z);
const SCEV *Term2 = OtherAddRec->getOperand(z);
- Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2,
- SCEV::FlagAnyWrap, Depth + 1),
- SCEV::FlagAnyWrap, Depth + 1);
+ SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2,
+ SCEV::FlagAnyWrap, Depth + 1));
}
}
- AddRecOps.push_back(Term);
+ if (SumOps.empty())
+ SumOps.push_back(getZero(Ty));
+ AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}
if (!Overflow) {
const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
@@ -3416,24 +3438,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// Okay, it looks like we really DO need an addrec expr. Check to see if we
// already have one, otherwise create a new one.
- FoldingSetNodeID ID;
- ID.AddInteger(scAddRecExpr);
- for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- ID.AddPointer(Operands[i]);
- ID.AddPointer(L);
- void *IP = nullptr;
- SCEVAddRecExpr *S =
- static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
- if (!S) {
- const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
- std::uninitialized_copy(Operands.begin(), Operands.end(), O);
- S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
- O, Operands.size(), L);
- UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
- }
- S->setNoWrapFlags(Flags);
- return S;
+ return getOrCreateAddRecExpr(Operands, L, Flags);
}
const SCEV *
@@ -7080,7 +7085,7 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
return getCouldNotCompute();
bool IsOnlyExit = (L->getExitingBlock() != nullptr);
- TerminatorInst *Term = ExitingBlock->getTerminator();
+ Instruction *Term = ExitingBlock->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
@@ -8344,69 +8349,273 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
}
-/// Find the roots of the quadratic equation for the given quadratic chrec
-/// {L,+,M,+,N}. This returns either the two roots (which might be the same) or
-/// two SCEVCouldNotCompute objects.
-static Optional<std::pair<const SCEVConstant *,const SCEVConstant *>>
-SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+/// For a given quadratic addrec, generate coefficients of the corresponding
+/// quadratic equation, multiplied by a common value to ensure that they are
+/// integers.
+/// The returned value is a tuple { A, B, C, M, BitWidth }, where
+/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C
+/// were multiplied by, and BitWidth is the bit width of the original addrec
+/// coefficients.
+/// This function returns None if the addrec coefficients are not compile-
+/// time constants.
+static Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>>
+GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+ LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: "
+ << *AddRec << '\n');
// We currently can only solve this if the coefficients are constants.
- if (!LC || !MC || !NC)
+ if (!LC || !MC || !NC) {
+ LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n");
return None;
+ }
- uint32_t BitWidth = LC->getAPInt().getBitWidth();
- const APInt &L = LC->getAPInt();
- const APInt &M = MC->getAPInt();
- const APInt &N = NC->getAPInt();
- APInt Two(BitWidth, 2);
-
- // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+ APInt L = LC->getAPInt();
+ APInt M = MC->getAPInt();
+ APInt N = NC->getAPInt();
+ assert(!N.isNullValue() && "This is not a quadratic addrec");
+
+ unsigned BitWidth = LC->getAPInt().getBitWidth();
+ unsigned NewWidth = BitWidth + 1;
+ LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: "
+ << BitWidth << '\n');
+ // The sign-extension (as opposed to a zero-extension) here matches the
+ // extension used in SolveQuadraticEquationWrap (with the same motivation).
+ N = N.sext(NewWidth);
+ M = M.sext(NewWidth);
+ L = L.sext(NewWidth);
+
+ // The increments are M, M+N, M+2N, ..., so the accumulated values are
+ // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is,
+ // L+M, L+2M+N, L+3M+3N, ...
+ // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N.
+ //
+ // The equation Acc = 0 is then
+ // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0.
+ // In a quadratic form it becomes:
+ // N n^2 + (2M-N) n + 2L = 0.
+
+ APInt A = N;
+ APInt B = 2 * M - A;
+ APInt C = 2 * L;
+ APInt T = APInt(NewWidth, 2);
+ LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B
+ << "x + " << C << ", coeff bw: " << NewWidth
+ << ", multiplied by " << T << '\n');
+ return std::make_tuple(A, B, C, T, BitWidth);
+}
+
+/// Helper function to compare optional APInts:
+/// (a) if X and Y both exist, return min(X, Y),
+/// (b) if neither X nor Y exist, return None,
+/// (c) if exactly one of X and Y exists, return that value.
+static Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) {
+ if (X.hasValue() && Y.hasValue()) {
+ unsigned W = std::max(X->getBitWidth(), Y->getBitWidth());
+ APInt XW = X->sextOrSelf(W);
+ APInt YW = Y->sextOrSelf(W);
+ return XW.slt(YW) ? *X : *Y;
+ }
+ if (!X.hasValue() && !Y.hasValue())
+ return None;
+ return X.hasValue() ? *X : *Y;
+}
- // The A coefficient is N/2
- APInt A = N.sdiv(Two);
+/// Helper function to truncate an optional APInt to a given BitWidth.
+/// When solving addrec-related equations, it is preferable to return a value
+/// that has the same bit width as the original addrec's coefficients. If the
+/// solution fits in the original bit width, truncate it (except for i1).
+/// Returning a value of a different bit width may inhibit some optimizations.
+///
+/// In general, a solution to a quadratic equation generated from an addrec
+/// may require BW+1 bits, where BW is the bit width of the addrec's
+/// coefficients. The reason is that the coefficients of the quadratic
+/// equation are BW+1 bits wide (to avoid truncation when converting from
+/// the addrec to the equation).
+static Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) {
+ if (!X.hasValue())
+ return None;
+ unsigned W = X->getBitWidth();
+ if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth))
+ return X->trunc(BitWidth);
+ return X;
+}
+
+/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n
+/// iterations. The values L, M, N are assumed to be signed, and they
+/// should all have the same bit widths.
+/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW,
+/// where BW is the bit width of the addrec's coefficients.
+/// If the calculated value is a BW-bit integer (for BW > 1), it will be
+/// returned as such, otherwise the bit width of the returned value may
+/// be greater than BW.
+///
+/// This function returns None if
+/// (a) the addrec coefficients are not constant, or
+/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases
+/// like x^2 = 5, no integer solutions exist, in other cases an integer
+/// solution may exist, but SolveQuadraticEquationWrap may fail to find it.
+static Optional<APInt>
+SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+ APInt A, B, C, M;
+ unsigned BitWidth;
+ auto T = GetQuadraticEquation(AddRec);
+ if (!T.hasValue())
+ return None;
- // The B coefficient is M-N/2
- APInt B = M;
- B -= A; // A is the same as N/2.
+ std::tie(A, B, C, M, BitWidth) = *T;
+ LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n");
+ Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1);
+ if (!X.hasValue())
+ return None;
- // The C coefficient is L.
- const APInt& C = L;
+ ConstantInt *CX = ConstantInt::get(SE.getContext(), *X);
+ ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE);
+ if (!V->isZero())
+ return None;
- // Compute the B^2-4ac term.
- APInt SqrtTerm = B;
- SqrtTerm *= B;
- SqrtTerm -= 4 * (A * C);
+ return TruncIfPossible(X, BitWidth);
+}
- if (SqrtTerm.isNegative()) {
- // The loop is provably infinite.
+/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n
+/// iterations. The values M, N are assumed to be signed, and they
+/// should all have the same bit widths.
+/// Find the least n such that c(n) does not belong to the given range,
+/// while c(n-1) does.
+///
+/// This function returns None if
+/// (a) the addrec coefficients are not constant, or
+/// (b) SolveQuadraticEquationWrap was unable to find a solution for the
+/// bounds of the range.
+static Optional<APInt>
+SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
+ const ConstantRange &Range, ScalarEvolution &SE) {
+ assert(AddRec->getOperand(0)->isZero() &&
+ "Starting value of addrec should be 0");
+ LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range "
+ << Range << ", addrec " << *AddRec << '\n');
+ // This case is handled in getNumIterationsInRange. Here we can assume that
+ // we start in the range.
+ assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) &&
+ "Addrec's initial value should be in range");
+
+ APInt A, B, C, M;
+ unsigned BitWidth;
+ auto T = GetQuadraticEquation(AddRec);
+ if (!T.hasValue())
return None;
- }
- // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
- // integer value or else APInt::sqrt() will assert.
- APInt SqrtVal = SqrtTerm.sqrt();
+ // Be careful about the return value: there can be two reasons for not
+ // returning an actual number. First, if no solutions to the equations
+ // were found, and second, if the solutions don't leave the given range.
+ // The first case means that the actual solution is "unknown", the second
+ // means that it's known, but not valid. If the solution is unknown, we
+ // cannot make any conclusions.
+ // Return a pair: the optional solution and a flag indicating if the
+ // solution was found.
+ auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> {
+ // Solve for signed overflow and unsigned overflow, pick the lower
+ // solution.
+ LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary "
+ << Bound << " (before multiplying by " << M << ")\n");
+ Bound *= M; // The quadratic equation multiplier.
+
+ Optional<APInt> SO = None;
+ if (BitWidth > 1) {
+ LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
+ "signed overflow\n");
+ SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth);
+ }
+ LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for "
+ "unsigned overflow\n");
+ Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound,
+ BitWidth+1);
+
+ auto LeavesRange = [&] (const APInt &X) {
+ ConstantInt *C0 = ConstantInt::get(SE.getContext(), X);
+ ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE);
+ if (Range.contains(V0->getValue()))
+ return false;
+ // X should be at least 1, so X-1 is non-negative.
+ ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1);
+ ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE);
+ if (Range.contains(V1->getValue()))
+ return true;
+ return false;
+ };
- // Compute the two solutions for the quadratic formula.
- // The divisions must be performed as signed divisions.
- APInt NegB = -std::move(B);
- APInt TwoA = std::move(A);
- TwoA <<= 1;
- if (TwoA.isNullValue())
- return None;
+ // If SolveQuadraticEquationWrap returns None, it means that there can
+ // be a solution, but the function failed to find it. We cannot treat it
+ // as "no solution".
+ if (!SO.hasValue() || !UO.hasValue())
+ return { None, false };
+
+ // Check the smaller value first to see if it leaves the range.
+ // At this point, both SO and UO must have values.
+ Optional<APInt> Min = MinOptional(SO, UO);
+ if (LeavesRange(*Min))
+ return { Min, true };
+ Optional<APInt> Max = Min == SO ? UO : SO;
+ if (LeavesRange(*Max))
+ return { Max, true };
+
+ // Solutions were found, but were eliminated, hence the "true".
+ return { None, true };
+ };
- LLVMContext &Context = SE.getContext();
+ std::tie(A, B, C, M, BitWidth) = *T;
+ // Lower bound is inclusive, subtract 1 to represent the exiting value.
+ APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1;
+ APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth());
+ auto SL = SolveForBoundary(Lower);
+ auto SU = SolveForBoundary(Upper);
+ // If any of the solutions was unknown, no meaninigful conclusions can
+ // be made.
+ if (!SL.second || !SU.second)
+ return None;
- ConstantInt *Solution1 =
- ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
- ConstantInt *Solution2 =
- ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+ // Claim: The correct solution is not some value between Min and Max.
+ //
+ // Justification: Assuming that Min and Max are different values, one of
+ // them is when the first signed overflow happens, the other is when the
+ // first unsigned overflow happens. Crossing the range boundary is only
+ // possible via an overflow (treating 0 as a special case of it, modeling
+ // an overflow as crossing k*2^W for some k).
+ //
+ // The interesting case here is when Min was eliminated as an invalid
+ // solution, but Max was not. The argument is that if there was another
+ // overflow between Min and Max, it would also have been eliminated if
+ // it was considered.
+ //
+ // For a given boundary, it is possible to have two overflows of the same
+ // type (signed/unsigned) without having the other type in between: this
+ // can happen when the vertex of the parabola is between the iterations
+ // corresponding to the overflows. This is only possible when the two
+ // overflows cross k*2^W for the same k. In such case, if the second one
+ // left the range (and was the first one to do so), the first overflow
+ // would have to enter the range, which would mean that either we had left
+ // the range before or that we started outside of it. Both of these cases
+ // are contradictions.
+ //
+ // Claim: In the case where SolveForBoundary returns None, the correct
+ // solution is not some value between the Max for this boundary and the
+ // Min of the other boundary.
+ //
+ // Justification: Assume that we had such Max_A and Min_B corresponding
+ // to range boundaries A and B and such that Max_A < Min_B. If there was
+ // a solution between Max_A and Min_B, it would have to be caused by an
+ // overflow corresponding to either A or B. It cannot correspond to B,
+ // since Min_B is the first occurrence of such an overflow. If it
+ // corresponded to A, it would have to be either a signed or an unsigned
+ // overflow that is larger than both eliminated overflows for A. But
+ // between the eliminated overflows and this overflow, the values would
+ // cover the entire value space, thus crossing the other boundary, which
+ // is a contradiction.
- return std::make_pair(cast<SCEVConstant>(SE.getConstant(Solution1)),
- cast<SCEVConstant>(SE.getConstant(Solution2)));
+ return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
}
ScalarEvolution::ExitLimit
@@ -8441,23 +8650,12 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
// the quadratic equation to solve it.
if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
- if (auto Roots = SolveQuadraticEquation(AddRec, *this)) {
- const SCEVConstant *R1 = Roots->first;
- const SCEVConstant *R2 = Roots->second;
- // Pick the smallest positive root value.
- if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
- CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
- if (!CB->getZExtValue())
- std::swap(R1, R2); // R1 is the minimum root now.
-
- // We can only use this value if the chrec ends up with an exact zero
- // value at this index. When solving for "X*X != 5", for example, we
- // should not accept a root of 2.
- const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
- if (Val->isZero())
- // We found a quadratic root!
- return ExitLimit(R1, R1, false, Predicates);
- }
+ // We can only use this value if the chrec ends up with an exact zero
+ // value at this index. When solving for "X*X != 5", for example, we
+ // should not accept a root of 2.
+ if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) {
+ const auto *R = cast<SCEVConstant>(getConstant(S.getValue()));
+ return ExitLimit(R, R, false, Predicates);
}
return getCouldNotCompute();
}
@@ -8617,7 +8815,13 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS, const SCEV *&RHS,
unsigned Depth) {
bool Changed = false;
-
+ // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
+ // '0 != 0'.
+ auto TrivialCase = [&](bool TriviallyTrue) {
+ LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+ Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
+ return true;
+ };
// If we hit the max recursion limit bail out.
if (Depth >= 3)
return false;
@@ -8629,9 +8833,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
if (ConstantExpr::getICmp(Pred,
LHSC->getValue(),
RHSC->getValue())->isNullValue())
- goto trivially_false;
+ return TrivialCase(false);
else
- goto trivially_true;
+ return TrivialCase(true);
}
// Otherwise swap the operands to put the constant on the right.
std::swap(LHS, RHS);
@@ -8661,9 +8865,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
if (!ICmpInst::isEquality(Pred)) {
ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
if (ExactCR.isFullSet())
- goto trivially_true;
+ return TrivialCase(true);
else if (ExactCR.isEmptySet())
- goto trivially_false;
+ return TrivialCase(false);
APInt NewRHS;
CmpInst::Predicate NewPred;
@@ -8699,7 +8903,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// The "Should have been caught earlier!" messages refer to the fact
// that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
// should have fired on the corresponding cases, and canonicalized the
- // check to trivially_true or trivially_false.
+ // check to trivial case.
case ICmpInst::ICMP_UGE:
assert(!RA.isMinValue() && "Should have been caught earlier!");
@@ -8732,9 +8936,9 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// Check for obvious equality.
if (HasSameValue(LHS, RHS)) {
if (ICmpInst::isTrueWhenEqual(Pred))
- goto trivially_true;
+ return TrivialCase(true);
if (ICmpInst::isFalseWhenEqual(Pred))
- goto trivially_false;
+ return TrivialCase(false);
}
// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
@@ -8802,18 +9006,6 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
return Changed;
-
-trivially_true:
- // Return 0 == 0.
- LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
- Pred = ICmpInst::ICMP_EQ;
- return true;
-
-trivially_false:
- // Return 0 != 0.
- LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
- Pred = ICmpInst::ICMP_NE;
- return true;
}
bool ScalarEvolution::isKnownNegative(const SCEV *S) {
@@ -9184,6 +9376,11 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return true;
+ if (VerifyIR)
+ assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
+ "This cannot be done on broken IR!");
+
+
if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
return true;
@@ -9289,6 +9486,10 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return false;
+ if (VerifyIR)
+ assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
+ "This cannot be done on broken IR!");
+
// Both LHS and RHS must be available at loop entry.
assert(isAvailableAtLoopEntry(LHS, L) &&
"LHS is not available at Loop Entry");
@@ -10565,52 +10766,11 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
"Linear scev computation is off in a bad way!");
return SE.getConstant(ExitValue);
- } else if (isQuadratic()) {
- // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
- // quadratic equation to solve it. To do this, we must frame our problem in
- // terms of figuring out when zero is crossed, instead of when
- // Range.getUpper() is crossed.
- SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
- NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
- const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
-
- // Next, solve the constructed addrec
- if (auto Roots =
- SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE)) {
- const SCEVConstant *R1 = Roots->first;
- const SCEVConstant *R2 = Roots->second;
- // Pick the smallest positive root value.
- if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
- ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
- if (!CB->getZExtValue())
- std::swap(R1, R2); // R1 is the minimum root now.
-
- // Make sure the root is not off by one. The returned iteration should
- // not be in the range, but the previous one should be. When solving
- // for "X*X < 5", for example, we should not return a root of 2.
- ConstantInt *R1Val =
- EvaluateConstantChrecAtConstant(this, R1->getValue(), SE);
- if (Range.contains(R1Val->getValue())) {
- // The next iteration must be out of the range...
- ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
-
- R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
- if (!Range.contains(R1Val->getValue()))
- return SE.getConstant(NextVal);
- return SE.getCouldNotCompute(); // Something strange happened
- }
+ }
- // If R1 was not in the range, then it is a good return value. Make
- // sure that R1-1 WAS in the range though, just in case.
- ConstantInt *NextVal =
- ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
- R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
- if (Range.contains(R1Val->getValue()))
- return R1;
- return SE.getCouldNotCompute(); // Something strange happened
- }
- }
+ if (isQuadratic()) {
+ if (auto S = SolveQuadraticAddRecRange(this, Range, SE))
+ return SE.getConstant(S.getValue());
}
return SE.getCouldNotCompute();
@@ -10920,7 +11080,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
// Put larger terms first.
- llvm::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+ llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
return numberOfTerms(LHS) > numberOfTerms(RHS);
});
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 7bea994121c8..289d4f8ae49a 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -27,7 +27,7 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
// If either of the memory references is empty, it doesn't matter what the
// pointer values are. This allows the code below to ignore this special
// case.
- if (LocA.Size == 0 || LocB.Size == 0)
+ if (LocA.Size.isZero() || LocB.Size.isZero())
return NoAlias;
// This is SCEVAAResult. Get the SCEVs!
@@ -43,8 +43,12 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
if (SE.getEffectiveSCEVType(AS->getType()) ==
SE.getEffectiveSCEVType(BS->getType())) {
unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
- APInt ASizeInt(BitWidth, LocA.Size);
- APInt BSizeInt(BitWidth, LocB.Size);
+ APInt ASizeInt(BitWidth, LocA.Size.hasValue()
+ ? LocA.Size.getValue()
+ : MemoryLocation::UnknownSize);
+ APInt BSizeInt(BitWidth, LocB.Size.hasValue()
+ ? LocB.Size.getValue()
+ : MemoryLocation::UnknownSize);
// Compute the difference between the two pointers.
const SCEV *BA = SE.getMinusSCEV(BS, AS);
@@ -78,10 +82,10 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
Value *BO = GetBaseValue(BS);
if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
if (alias(MemoryLocation(AO ? AO : LocA.Ptr,
- AO ? +MemoryLocation::UnknownSize : LocA.Size,
+ AO ? LocationSize::unknown() : LocA.Size,
AO ? AAMDNodes() : LocA.AATags),
MemoryLocation(BO ? BO : LocB.Ptr,
- BO ? +MemoryLocation::UnknownSize : LocB.Size,
+ BO ? LocationSize::unknown() : LocB.Size,
BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
return NoAlias;
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 8f89389c4b5d..ca5cf1663b83 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1867,7 +1867,7 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
Phis.push_back(&PN);
if (TTI)
- llvm::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
+ llvm::sort(Phis, [](Value *LHS, Value *RHS) {
// Put pointers at the back and make sure pointer < pointer = false.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index f12275aff387..9a581fe46afc 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -95,39 +95,36 @@ AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
return AAResultBase::alias(LocA, LocB);
}
-ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
- if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
- LLVMContext::MD_noalias)))
+ if (!mayAliasInScopes(Loc.AATags.Scope,
+ Call->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- if (!mayAliasInScopes(
- CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- Loc.AATags.NoAlias))
+ if (!mayAliasInScopes(Call->getMetadata(LLVMContext::MD_alias_scope),
+ Loc.AATags.NoAlias))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
-ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
if (!EnableScopedNoAlias)
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
- if (!mayAliasInScopes(
- CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope),
+ Call2->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- if (!mayAliasInScopes(
- CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
- CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ if (!mayAliasInScopes(Call2->getMetadata(LLVMContext::MD_alias_scope),
+ Call1->getMetadata(LLVMContext::MD_noalias)))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
}
static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
diff --git a/lib/Analysis/StackSafetyAnalysis.cpp b/lib/Analysis/StackSafetyAnalysis.cpp
new file mode 100644
index 000000000000..66b03845864f
--- /dev/null
+++ b/lib/Analysis/StackSafetyAnalysis.cpp
@@ -0,0 +1,673 @@
+//===- StackSafetyAnalysis.cpp - Stack memory safety analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "stack-safety"
+
+static cl::opt<int> StackSafetyMaxIterations("stack-safety-max-iterations",
+ cl::init(20), cl::Hidden);
+
+namespace {
+
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+ const Value *AllocaPtr;
+
+public:
+ AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+ : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
+
+ const SCEV *visit(const SCEV *Expr) {
+ // Only re-write the expression if the alloca is used in an addition
+ // expression (it can be used in other types of expressions if it's cast to
+ // an int and passed as an argument.)
+ if (!isa<SCEVAddRecExpr>(Expr) && !isa<SCEVAddExpr>(Expr) &&
+ !isa<SCEVUnknown>(Expr))
+ return Expr;
+ return SCEVRewriteVisitor<AllocaOffsetRewriter>::visit(Expr);
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ // FIXME: look through one or several levels of definitions?
+ // This can be inttoptr(AllocaPtr) and SCEV would not unwrap
+ // it for us.
+ if (Expr->getValue() == AllocaPtr)
+ return SE.getZero(Expr->getType());
+ return Expr;
+ }
+};
+
+/// Describes use of address in as a function call argument.
+struct PassAsArgInfo {
+ /// Function being called.
+ const GlobalValue *Callee = nullptr;
+ /// Index of argument which pass address.
+ size_t ParamNo = 0;
+ // Offset range of address from base address (alloca or calling function
+ // argument).
+ // Range should never set to empty-set, that is an invalid access range
+ // that can cause empty-set to be propagated with ConstantRange::add
+ ConstantRange Offset;
+ PassAsArgInfo(const GlobalValue *Callee, size_t ParamNo, ConstantRange Offset)
+ : Callee(Callee), ParamNo(ParamNo), Offset(Offset) {}
+
+ StringRef getName() const { return Callee->getName(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const PassAsArgInfo &P) {
+ return OS << "@" << P.getName() << "(arg" << P.ParamNo << ", " << P.Offset
+ << ")";
+}
+
+/// Describe uses of address (alloca or parameter) inside of the function.
+struct UseInfo {
+ // Access range if the address (alloca or parameters).
+ // It is allowed to be empty-set when there are no known accesses.
+ ConstantRange Range;
+
+ // List of calls which pass address as an argument.
+ SmallVector<PassAsArgInfo, 4> Calls;
+
+ explicit UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
+
+ void updateRange(ConstantRange R) { Range = Range.unionWith(R); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const UseInfo &U) {
+ OS << U.Range;
+ for (auto &Call : U.Calls)
+ OS << ", " << Call;
+ return OS;
+}
+
+struct AllocaInfo {
+ const AllocaInst *AI = nullptr;
+ uint64_t Size = 0;
+ UseInfo Use;
+
+ AllocaInfo(unsigned PointerSize, const AllocaInst *AI, uint64_t Size)
+ : AI(AI), Size(Size), Use(PointerSize) {}
+
+ StringRef getName() const { return AI->getName(); }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const AllocaInfo &A) {
+ return OS << A.getName() << "[" << A.Size << "]: " << A.Use;
+}
+
+struct ParamInfo {
+ const Argument *Arg = nullptr;
+ UseInfo Use;
+
+ explicit ParamInfo(unsigned PointerSize, const Argument *Arg)
+ : Arg(Arg), Use(PointerSize) {}
+
+ StringRef getName() const { return Arg ? Arg->getName() : "<N/A>"; }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ParamInfo &P) {
+ return OS << P.getName() << "[]: " << P.Use;
+}
+
+/// Calculate the allocation size of a given alloca. Returns 0 if the
+/// size can not be statically determined.
+uint64_t getStaticAllocaAllocationSize(const AllocaInst *AI) {
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
+ if (AI->isArrayAllocation()) {
+ auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+ if (!C)
+ return 0;
+ Size *= C->getZExtValue();
+ }
+ return Size;
+}
+
+} // end anonymous namespace
+
+/// Describes uses of allocas and parameters inside of a single function.
+struct StackSafetyInfo::FunctionInfo {
+ // May be a Function or a GlobalAlias
+ const GlobalValue *GV = nullptr;
+ // Informations about allocas uses.
+ SmallVector<AllocaInfo, 4> Allocas;
+ // Informations about parameters uses.
+ SmallVector<ParamInfo, 4> Params;
+ // TODO: describe return value as depending on one or more of its arguments.
+
+ // StackSafetyDataFlowAnalysis counter stored here for faster access.
+ int UpdateCount = 0;
+
+ FunctionInfo(const StackSafetyInfo &SSI) : FunctionInfo(*SSI.Info) {}
+
+ explicit FunctionInfo(const Function *F) : GV(F){};
+ // Creates FunctionInfo that forwards all the parameters to the aliasee.
+ explicit FunctionInfo(const GlobalAlias *A);
+
+ FunctionInfo(FunctionInfo &&) = default;
+
+ bool IsDSOLocal() const { return GV->isDSOLocal(); };
+
+ bool IsInterposable() const { return GV->isInterposable(); };
+
+ StringRef getName() const { return GV->getName(); }
+
+ void print(raw_ostream &O) const {
+ // TODO: Consider different printout format after
+ // StackSafetyDataFlowAnalysis. Calls and parameters are irrelevant then.
+ O << " @" << getName() << (IsDSOLocal() ? "" : " dso_preemptable")
+ << (IsInterposable() ? " interposable" : "") << "\n";
+ O << " args uses:\n";
+ for (auto &P : Params)
+ O << " " << P << "\n";
+ O << " allocas uses:\n";
+ for (auto &AS : Allocas)
+ O << " " << AS << "\n";
+ }
+
+private:
+ FunctionInfo(const FunctionInfo &) = default;
+};
+
+StackSafetyInfo::FunctionInfo::FunctionInfo(const GlobalAlias *A) : GV(A) {
+ unsigned PointerSize = A->getParent()->getDataLayout().getPointerSizeInBits();
+ const GlobalObject *Aliasee = A->getBaseObject();
+ const FunctionType *Type = cast<FunctionType>(Aliasee->getValueType());
+ // 'Forward' all parameters to this alias to the aliasee
+ for (unsigned ArgNo = 0; ArgNo < Type->getNumParams(); ArgNo++) {
+ Params.emplace_back(PointerSize, nullptr);
+ UseInfo &US = Params.back().Use;
+ US.Calls.emplace_back(Aliasee, ArgNo, ConstantRange(APInt(PointerSize, 0)));
+ }
+}
+
+namespace {
+
+class StackSafetyLocalAnalysis {
+ const Function &F;
+ const DataLayout &DL;
+ ScalarEvolution &SE;
+ unsigned PointerSize = 0;
+
+ const ConstantRange UnknownRange;
+
+ ConstantRange offsetFromAlloca(Value *Addr, const Value *AllocaPtr);
+ ConstantRange getAccessRange(Value *Addr, const Value *AllocaPtr,
+ uint64_t AccessSize);
+ ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U,
+ const Value *AllocaPtr);
+
+ bool analyzeAllUses(const Value *Ptr, UseInfo &AS);
+
+ ConstantRange getRange(uint64_t Lower, uint64_t Upper) const {
+ return ConstantRange(APInt(PointerSize, Lower), APInt(PointerSize, Upper));
+ }
+
+public:
+ StackSafetyLocalAnalysis(const Function &F, ScalarEvolution &SE)
+ : F(F), DL(F.getParent()->getDataLayout()), SE(SE),
+ PointerSize(DL.getPointerSizeInBits()),
+ UnknownRange(PointerSize, true) {}
+
+ // Run the transformation on the associated function.
+ StackSafetyInfo run();
+};
+
+ConstantRange
+StackSafetyLocalAnalysis::offsetFromAlloca(Value *Addr,
+ const Value *AllocaPtr) {
+ if (!SE.isSCEVable(Addr->getType()))
+ return UnknownRange;
+
+ AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+ ConstantRange Offset = SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize);
+ assert(!Offset.isEmptySet());
+ return Offset;
+}
+
+ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr,
+ const Value *AllocaPtr,
+ uint64_t AccessSize) {
+ if (!SE.isSCEVable(Addr->getType()))
+ return UnknownRange;
+
+ AllocaOffsetRewriter Rewriter(SE, AllocaPtr);
+ const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr));
+
+ ConstantRange AccessStartRange =
+ SE.getUnsignedRange(Expr).zextOrTrunc(PointerSize);
+ ConstantRange SizeRange = getRange(0, AccessSize);
+ ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+ assert(!AccessRange.isEmptySet());
+ return AccessRange;
+}
+
+ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
+ const MemIntrinsic *MI, const Use &U, const Value *AllocaPtr) {
+ if (auto MTI = dyn_cast<MemTransferInst>(MI)) {
+ if (MTI->getRawSource() != U && MTI->getRawDest() != U)
+ return getRange(0, 1);
+ } else {
+ if (MI->getRawDest() != U)
+ return getRange(0, 1);
+ }
+ const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+ // Non-constant size => unsafe. FIXME: try SCEV getRange.
+ if (!Len)
+ return UnknownRange;
+ ConstantRange AccessRange = getAccessRange(U, AllocaPtr, Len->getZExtValue());
+ return AccessRange;
+}
+
+/// The function analyzes all local uses of Ptr (alloca or argument) and
+/// calculates local access range and all function calls where it was used.
+bool StackSafetyLocalAnalysis::analyzeAllUses(const Value *Ptr, UseInfo &US) {
+ SmallPtrSet<const Value *, 16> Visited;
+ SmallVector<const Value *, 8> WorkList;
+ WorkList.push_back(Ptr);
+
+ // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+ while (!WorkList.empty()) {
+ const Value *V = WorkList.pop_back_val();
+ for (const Use &UI : V->uses()) {
+ auto I = cast<const Instruction>(UI.getUser());
+ assert(V == UI.get());
+
+ switch (I->getOpcode()) {
+ case Instruction::Load: {
+ US.updateRange(
+ getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
+ break;
+ }
+
+ case Instruction::VAArg:
+ // "va-arg" from a pointer is safe.
+ break;
+ case Instruction::Store: {
+ if (V == I->getOperand(0)) {
+ // Stored the pointer - conservatively assume it may be unsafe.
+ US.updateRange(UnknownRange);
+ return false;
+ }
+ US.updateRange(getAccessRange(
+ UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
+ break;
+ }
+
+ case Instruction::Ret:
+ // Information leak.
+ // FIXME: Process parameters correctly. This is a leak only if we return
+ // alloca.
+ US.updateRange(UnknownRange);
+ return false;
+
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ ImmutableCallSite CS(I);
+
+ if (I->isLifetimeStartOrEnd())
+ break;
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ US.updateRange(getMemIntrinsicAccessRange(MI, UI, Ptr));
+ break;
+ }
+
+ // FIXME: consult devirt?
+ // Do not follow aliases, otherwise we could inadvertently follow
+ // dso_preemptable aliases or aliases with interposable linkage.
+ const GlobalValue *Callee = dyn_cast<GlobalValue>(
+ CS.getCalledValue()->stripPointerCastsNoFollowAliases());
+ if (!Callee) {
+ US.updateRange(UnknownRange);
+ return false;
+ }
+
+ assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee));
+
+ ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator A = B; A != E; ++A) {
+ if (A->get() == V) {
+ ConstantRange Offset = offsetFromAlloca(UI, Ptr);
+ US.Calls.emplace_back(Callee, A - B, Offset);
+ }
+ }
+
+ break;
+ }
+
+ default:
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+ }
+ }
+
+ return true;
+}
+
+StackSafetyInfo StackSafetyLocalAnalysis::run() {
+ StackSafetyInfo::FunctionInfo Info(&F);
+ assert(!F.isDeclaration() &&
+ "Can't run StackSafety on a function declaration");
+
+ LLVM_DEBUG(dbgs() << "[StackSafety] " << F.getName() << "\n");
+
+ for (auto &I : instructions(F)) {
+ if (auto AI = dyn_cast<AllocaInst>(&I)) {
+ Info.Allocas.emplace_back(PointerSize, AI,
+ getStaticAllocaAllocationSize(AI));
+ AllocaInfo &AS = Info.Allocas.back();
+ analyzeAllUses(AI, AS.Use);
+ }
+ }
+
+ for (const Argument &A : make_range(F.arg_begin(), F.arg_end())) {
+ Info.Params.emplace_back(PointerSize, &A);
+ ParamInfo &PS = Info.Params.back();
+ analyzeAllUses(&A, PS.Use);
+ }
+
+ LLVM_DEBUG(dbgs() << "[StackSafety] done\n");
+ LLVM_DEBUG(Info.print(dbgs()));
+ return StackSafetyInfo(std::move(Info));
+}
+
+class StackSafetyDataFlowAnalysis {
+ using FunctionMap =
+ std::map<const GlobalValue *, StackSafetyInfo::FunctionInfo>;
+
+ FunctionMap Functions;
+ // Callee-to-Caller multimap.
+ DenseMap<const GlobalValue *, SmallVector<const GlobalValue *, 4>> Callers;
+ SetVector<const GlobalValue *> WorkList;
+
+ unsigned PointerSize = 0;
+ const ConstantRange UnknownRange;
+
+ ConstantRange getArgumentAccessRange(const GlobalValue *Callee,
+ unsigned ParamNo) const;
+ bool updateOneUse(UseInfo &US, bool UpdateToFullSet);
+ void updateOneNode(const GlobalValue *Callee,
+ StackSafetyInfo::FunctionInfo &FS);
+ void updateOneNode(const GlobalValue *Callee) {
+ updateOneNode(Callee, Functions.find(Callee)->second);
+ }
+ void updateAllNodes() {
+ for (auto &F : Functions)
+ updateOneNode(F.first, F.second);
+ }
+ void runDataFlow();
+ void verifyFixedPoint();
+
+public:
+ StackSafetyDataFlowAnalysis(
+ Module &M, std::function<const StackSafetyInfo &(Function &)> FI);
+ StackSafetyGlobalInfo run();
+};
+
+StackSafetyDataFlowAnalysis::StackSafetyDataFlowAnalysis(
+ Module &M, std::function<const StackSafetyInfo &(Function &)> FI)
+ : PointerSize(M.getDataLayout().getPointerSizeInBits()),
+ UnknownRange(PointerSize, true) {
+ // Without ThinLTO, run the local analysis for every function in the TU and
+ // then run the DFA.
+ for (auto &F : M.functions())
+ if (!F.isDeclaration())
+ Functions.emplace(&F, FI(F));
+ for (auto &A : M.aliases())
+ if (isa<Function>(A.getBaseObject()))
+ Functions.emplace(&A, StackSafetyInfo::FunctionInfo(&A));
+}
+
+ConstantRange
+StackSafetyDataFlowAnalysis::getArgumentAccessRange(const GlobalValue *Callee,
+ unsigned ParamNo) const {
+ auto IT = Functions.find(Callee);
+ // Unknown callee (outside of LTO domain or an indirect call).
+ if (IT == Functions.end())
+ return UnknownRange;
+ const StackSafetyInfo::FunctionInfo &FS = IT->second;
+ // The definition of this symbol may not be the definition in this linkage
+ // unit.
+ if (!FS.IsDSOLocal() || FS.IsInterposable())
+ return UnknownRange;
+ if (ParamNo >= FS.Params.size()) // possibly vararg
+ return UnknownRange;
+ return FS.Params[ParamNo].Use.Range;
+}
+
+bool StackSafetyDataFlowAnalysis::updateOneUse(UseInfo &US,
+ bool UpdateToFullSet) {
+ bool Changed = false;
+ for (auto &CS : US.Calls) {
+ assert(!CS.Offset.isEmptySet() &&
+ "Param range can't be empty-set, invalid offset range");
+
+ ConstantRange CalleeRange = getArgumentAccessRange(CS.Callee, CS.ParamNo);
+ CalleeRange = CalleeRange.add(CS.Offset);
+ if (!US.Range.contains(CalleeRange)) {
+ Changed = true;
+ if (UpdateToFullSet)
+ US.Range = UnknownRange;
+ else
+ US.Range = US.Range.unionWith(CalleeRange);
+ }
+ }
+ return Changed;
+}
+
+void StackSafetyDataFlowAnalysis::updateOneNode(
+ const GlobalValue *Callee, StackSafetyInfo::FunctionInfo &FS) {
+ bool UpdateToFullSet = FS.UpdateCount > StackSafetyMaxIterations;
+ bool Changed = false;
+ for (auto &AS : FS.Allocas)
+ Changed |= updateOneUse(AS.Use, UpdateToFullSet);
+ for (auto &PS : FS.Params)
+ Changed |= updateOneUse(PS.Use, UpdateToFullSet);
+
+ if (Changed) {
+ LLVM_DEBUG(dbgs() << "=== update [" << FS.UpdateCount
+ << (UpdateToFullSet ? ", full-set" : "") << "] "
+ << FS.getName() << "\n");
+ // Callers of this function may need updating.
+ for (auto &CallerID : Callers[Callee])
+ WorkList.insert(CallerID);
+
+ ++FS.UpdateCount;
+ }
+}
+
+void StackSafetyDataFlowAnalysis::runDataFlow() {
+ Callers.clear();
+ WorkList.clear();
+
+ SmallVector<const GlobalValue *, 16> Callees;
+ for (auto &F : Functions) {
+ Callees.clear();
+ StackSafetyInfo::FunctionInfo &FS = F.second;
+ for (auto &AS : FS.Allocas)
+ for (auto &CS : AS.Use.Calls)
+ Callees.push_back(CS.Callee);
+ for (auto &PS : FS.Params)
+ for (auto &CS : PS.Use.Calls)
+ Callees.push_back(CS.Callee);
+
+ llvm::sort(Callees);
+ Callees.erase(std::unique(Callees.begin(), Callees.end()), Callees.end());
+
+ for (auto &Callee : Callees)
+ Callers[Callee].push_back(F.first);
+ }
+
+ updateAllNodes();
+
+ while (!WorkList.empty()) {
+ const GlobalValue *Callee = WorkList.back();
+ WorkList.pop_back();
+ updateOneNode(Callee);
+ }
+}
+
+void StackSafetyDataFlowAnalysis::verifyFixedPoint() {
+ WorkList.clear();
+ updateAllNodes();
+ assert(WorkList.empty());
+}
+
+StackSafetyGlobalInfo StackSafetyDataFlowAnalysis::run() {
+ runDataFlow();
+ LLVM_DEBUG(verifyFixedPoint());
+
+ StackSafetyGlobalInfo SSI;
+ for (auto &F : Functions)
+ SSI.emplace(F.first, std::move(F.second));
+ return SSI;
+}
+
+void print(const StackSafetyGlobalInfo &SSI, raw_ostream &O, const Module &M) {
+ size_t Count = 0;
+ for (auto &F : M.functions())
+ if (!F.isDeclaration()) {
+ SSI.find(&F)->second.print(O);
+ O << "\n";
+ ++Count;
+ }
+ for (auto &A : M.aliases()) {
+ SSI.find(&A)->second.print(O);
+ O << "\n";
+ ++Count;
+ }
+ assert(Count == SSI.size() && "Unexpected functions in the result");
+}
+
+} // end anonymous namespace
+
+StackSafetyInfo::StackSafetyInfo() = default;
+StackSafetyInfo::StackSafetyInfo(StackSafetyInfo &&) = default;
+StackSafetyInfo &StackSafetyInfo::operator=(StackSafetyInfo &&) = default;
+
+StackSafetyInfo::StackSafetyInfo(FunctionInfo &&Info)
+ : Info(new FunctionInfo(std::move(Info))) {}
+
+StackSafetyInfo::~StackSafetyInfo() = default;
+
+void StackSafetyInfo::print(raw_ostream &O) const { Info->print(O); }
+
+AnalysisKey StackSafetyAnalysis::Key;
+
+StackSafetyInfo StackSafetyAnalysis::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ StackSafetyLocalAnalysis SSLA(F, AM.getResult<ScalarEvolutionAnalysis>(F));
+ return SSLA.run();
+}
+
+PreservedAnalyses StackSafetyPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ OS << "'Stack Safety Local Analysis' for function '" << F.getName() << "'\n";
+ AM.getResult<StackSafetyAnalysis>(F).print(OS);
+ return PreservedAnalyses::all();
+}
+
+char StackSafetyInfoWrapperPass::ID = 0;
+
+StackSafetyInfoWrapperPass::StackSafetyInfoWrapperPass() : FunctionPass(ID) {
+ initializeStackSafetyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void StackSafetyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.setPreservesAll();
+}
+
+void StackSafetyInfoWrapperPass::print(raw_ostream &O, const Module *M) const {
+ SSI.print(O);
+}
+
+bool StackSafetyInfoWrapperPass::runOnFunction(Function &F) {
+ StackSafetyLocalAnalysis SSLA(
+ F, getAnalysis<ScalarEvolutionWrapperPass>().getSE());
+ SSI = StackSafetyInfo(SSLA.run());
+ return false;
+}
+
+AnalysisKey StackSafetyGlobalAnalysis::Key;
+
+StackSafetyGlobalInfo
+StackSafetyGlobalAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ StackSafetyDataFlowAnalysis SSDFA(
+ M, [&FAM](Function &F) -> const StackSafetyInfo & {
+ return FAM.getResult<StackSafetyAnalysis>(F);
+ });
+ return SSDFA.run();
+}
+
+PreservedAnalyses StackSafetyGlobalPrinterPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ OS << "'Stack Safety Analysis' for module '" << M.getName() << "'\n";
+ print(AM.getResult<StackSafetyGlobalAnalysis>(M), OS, M);
+ return PreservedAnalyses::all();
+}
+
+char StackSafetyGlobalInfoWrapperPass::ID = 0;
+
+StackSafetyGlobalInfoWrapperPass::StackSafetyGlobalInfoWrapperPass()
+ : ModulePass(ID) {
+ initializeStackSafetyGlobalInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void StackSafetyGlobalInfoWrapperPass::print(raw_ostream &O,
+ const Module *M) const {
+ ::print(SSI, O, *M);
+}
+
+void StackSafetyGlobalInfoWrapperPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<StackSafetyInfoWrapperPass>();
+}
+
+bool StackSafetyGlobalInfoWrapperPass::runOnModule(Module &M) {
+ StackSafetyDataFlowAnalysis SSDFA(
+ M, [this](Function &F) -> const StackSafetyInfo & {
+ return getAnalysis<StackSafetyInfoWrapperPass>(F).getResult();
+ });
+ SSI = SSDFA.run();
+ return false;
+}
+
+static const char LocalPassArg[] = "stack-safety-local";
+static const char LocalPassName[] = "Stack Safety Local Analysis";
+INITIALIZE_PASS_BEGIN(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName,
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(StackSafetyInfoWrapperPass, LocalPassArg, LocalPassName,
+ false, true)
+
+static const char GlobalPassName[] = "Stack Safety Analysis";
+INITIALIZE_PASS_BEGIN(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE,
+ GlobalPassName, false, false)
+INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass)
+INITIALIZE_PASS_END(StackSafetyGlobalInfoWrapperPass, DEBUG_TYPE,
+ GlobalPassName, false, false)
diff --git a/lib/Analysis/SyncDependenceAnalysis.cpp b/lib/Analysis/SyncDependenceAnalysis.cpp
new file mode 100644
index 000000000000..e1a7e4476d12
--- /dev/null
+++ b/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -0,0 +1,380 @@
+//===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
+//--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an algorithm that returns for a divergent branch
+// the set of basic blocks whose phi nodes become divergent due to divergent
+// control. These are the blocks that are reachable by two disjoint paths from
+// the branch or loop exits that have a reaching path that is disjoint from a
+// path to the loop latch.
+//
+// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
+// control-induced divergence in phi nodes.
+//
+// -- Summary --
+// The SyncDependenceAnalysis lazily computes sync dependences [3].
+// The analysis evaluates the disjoint path criterion [2] by a reduction
+// to SSA construction. The SSA construction algorithm is implemented as
+// a simple data-flow analysis [1].
+//
+// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
+// [2] "Efficiently Computing Static Single Assignment Form
+// and the Control Dependence Graph", TOPLAS '91,
+// Cytron, Ferrante, Rosen, Wegman and Zadeck
+// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
+// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
+//
+// -- Sync dependence --
+// Sync dependence [4] characterizes the control flow aspect of the
+// propagation of branch divergence. For example,
+//
+// %cond = icmp slt i32 %tid, 10
+// br i1 %cond, label %then, label %else
+// then:
+// br label %merge
+// else:
+// br label %merge
+// merge:
+// %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// -- Reduction to SSA construction --
+// There are two disjoint paths from A to X, if a certain variant of SSA
+// construction places a phi node in X under the following set-up scheme [2].
+//
+// This variant of SSA construction ignores incoming undef values.
+// That is paths from the entry without a definition do not result in
+// phi nodes.
+//
+// entry
+// / \
+// A \
+// / \ Y
+// B C /
+// \ / \ /
+// D E
+// \ /
+// F
+// Assume that A contains a divergent branch. We are interested
+// in the set of all blocks where each block is reachable from A
+// via two disjoint paths. This would be the set {D, F} in this
+// case.
+// To generally reduce this query to SSA construction we introduce
+// a virtual variable x and assign to x different values in each
+// successor block of A.
+// entry
+// / \
+// A \
+// / \ Y
+// x = 0 x = 1 /
+// \ / \ /
+// D E
+// \ /
+// F
+// Our flavor of SSA construction for x will construct the following
+// entry
+// / \
+// A \
+// / \ Y
+// x0 = 0 x1 = 1 /
+// \ / \ /
+// x2=phi E
+// \ /
+// x3=phi
+// The blocks D and F contain phi nodes and are thus each reachable
+// by two disjoins paths from A.
+//
+// -- Remarks --
+// In case of loop exits we need to check the disjoint path criterion for loops
+// [2]. To this end, we check whether the definition of x differs between the
+// loop exit and the loop header (_after_ SSA construction).
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/SyncDependenceAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+
+#include <stack>
+#include <unordered_set>
+
+#define DEBUG_TYPE "sync-dependence"
+
+namespace llvm {
+
+ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
+
+SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
+ const PostDominatorTree &PDT,
+ const LoopInfo &LI)
+ : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
+
+SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
+
+using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
+
+// divergence propagator for reducible CFGs
+struct DivergencePropagator {
+ const FunctionRPOT &FuncRPOT;
+ const DominatorTree &DT;
+ const PostDominatorTree &PDT;
+ const LoopInfo &LI;
+
+ // identified join points
+ std::unique_ptr<ConstBlockSet> JoinBlocks;
+
+ // reached loop exits (by a path disjoint to a path to the loop header)
+ SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
+
+ // if DefMap[B] == C then C is the dominating definition at block B
+ // if DefMap[B] ~ undef then we haven't seen B yet
+ // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
+ // an immediate successor of X (initial value).
+ using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
+ DefiningBlockMap DefMap;
+
+ // all blocks with pending visits
+ std::unordered_set<const BasicBlock *> PendingUpdates;
+
+ DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
+ const PostDominatorTree &PDT, const LoopInfo &LI)
+ : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
+ JoinBlocks(new ConstBlockSet) {}
+
+ // set the definition at @block and mark @block as pending for a visit
+ void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
+ bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
+ if (WasAdded)
+ PendingUpdates.insert(&Block);
+ }
+
+ void printDefs(raw_ostream &Out) {
+ Out << "Propagator::DefMap {\n";
+ for (const auto *Block : FuncRPOT) {
+ auto It = DefMap.find(Block);
+ Out << Block->getName() << " : ";
+ if (It == DefMap.end()) {
+ Out << "\n";
+ } else {
+ const auto *DefBlock = It->second;
+ Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
+ }
+ }
+ Out << "}\n";
+ }
+
+ // process @succBlock with reaching definition @defBlock
+ // the original divergent branch was in @parentLoop (if any)
+ void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
+ const BasicBlock &DefBlock) {
+
+ // @succBlock is a loop exit
+ if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
+ DefMap.emplace(&SuccBlock, &DefBlock);
+ ReachedLoopExits.insert(&SuccBlock);
+ return;
+ }
+
+ // first reaching def?
+ auto ItLastDef = DefMap.find(&SuccBlock);
+ if (ItLastDef == DefMap.end()) {
+ addPending(SuccBlock, DefBlock);
+ return;
+ }
+
+ // a join of at least two definitions
+ if (ItLastDef->second != &DefBlock) {
+ // do we know this join already?
+ if (!JoinBlocks->insert(&SuccBlock).second)
+ return;
+
+ // update the definition
+ addPending(SuccBlock, SuccBlock);
+ }
+ }
+
+ // find all blocks reachable by two disjoint paths from @rootTerm.
+ // This method works for both divergent terminators and loops with
+ // divergent exits.
+ // @rootBlock is either the block containing the branch or the header of the
+ // divergent loop.
+ // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
+ // headed by @rootBlock.
+ // @parentLoop is the parent loop of the Loop or the loop that contains the
+ // Terminator.
+ template <typename SuccessorIterable>
+ std::unique_ptr<ConstBlockSet>
+ computeJoinPoints(const BasicBlock &RootBlock,
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
+ assert(JoinBlocks);
+
+ // immediate post dominator (no join block beyond that block)
+ const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
+ const auto *IpdNode = PdNode->getIDom();
+ const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
+
+ // bootstrap with branch targets
+ for (const auto *SuccBlock : NodeSuccessors) {
+ DefMap.emplace(SuccBlock, SuccBlock);
+
+ if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
+ // immediate loop exit from node.
+ ReachedLoopExits.insert(SuccBlock);
+ continue;
+ } else {
+ // regular successor
+ PendingUpdates.insert(SuccBlock);
+ }
+ }
+
+ auto ItBeginRPO = FuncRPOT.begin();
+
+ // skip until term (TODO RPOT won't let us start at @term directly)
+ for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
+
+ auto ItEndRPO = FuncRPOT.end();
+ assert(ItBeginRPO != ItEndRPO);
+
+ // propagate definitions at the immediate successors of the node in RPO
+ auto ItBlockRPO = ItBeginRPO;
+ while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
+ const auto *Block = *ItBlockRPO;
+
+ // skip @block if not pending update
+ auto ItPending = PendingUpdates.find(Block);
+ if (ItPending == PendingUpdates.end())
+ continue;
+ PendingUpdates.erase(ItPending);
+
+ // propagate definition at @block to its successors
+ auto ItDef = DefMap.find(Block);
+ const auto *DefBlock = ItDef->second;
+ assert(DefBlock);
+
+ auto *BlockLoop = LI.getLoopFor(Block);
+ if (ParentLoop &&
+ (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
+ // if the successor is the header of a nested loop pretend its a
+ // single node with the loop's exits as successors
+ SmallVector<BasicBlock *, 4> BlockLoopExits;
+ BlockLoop->getExitBlocks(BlockLoopExits);
+ for (const auto *BlockLoopExit : BlockLoopExits) {
+ visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
+ }
+
+ } else {
+ // the successors are either on the same loop level or loop exits
+ for (const auto *SuccBlock : successors(Block)) {
+ visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
+ }
+ }
+ }
+
+ // We need to know the definition at the parent loop header to decide
+ // whether the definition at the header is different from the definition at
+ // the loop exits, which would indicate a divergent loop exits.
+ //
+ // A // loop header
+ // |
+ // B // nested loop header
+ // |
+ // C -> X (exit from B loop) -..-> (A latch)
+ // |
+ // D -> back to B (B latch)
+ // |
+ // proper exit from both loops
+ //
+ // D post-dominates B as it is the only proper exit from the "A loop".
+ // If C has a divergent branch, propagation will therefore stop at D.
+ // That implies that B will never receive a definition.
+ // But that definition can only be the same as at D (D itself in thise case)
+ // because all paths to anywhere have to pass through D.
+ //
+ const BasicBlock *ParentLoopHeader =
+ ParentLoop ? ParentLoop->getHeader() : nullptr;
+ if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
+ DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
+ }
+
+ // analyze reached loop exits
+ if (!ReachedLoopExits.empty()) {
+ assert(ParentLoop);
+ const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
+ LLVM_DEBUG(printDefs(dbgs()));
+ assert(HeaderDefBlock && "no definition in header of carrying loop");
+
+ for (const auto *ExitBlock : ReachedLoopExits) {
+ auto ItExitDef = DefMap.find(ExitBlock);
+ assert((ItExitDef != DefMap.end()) &&
+ "no reaching def at reachable loop exit");
+ if (ItExitDef->second != HeaderDefBlock) {
+ JoinBlocks->insert(ExitBlock);
+ }
+ }
+ }
+
+ return std::move(JoinBlocks);
+ }
+};
+
+const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
+ using LoopExitVec = SmallVector<BasicBlock *, 4>;
+ LoopExitVec LoopExits;
+ Loop.getExitBlocks(LoopExits);
+ if (LoopExits.size() < 1) {
+ return EmptyBlockSet;
+ }
+
+ // already available in cache?
+ auto ItCached = CachedLoopExitJoins.find(&Loop);
+ if (ItCached != CachedLoopExitJoins.end())
+ return *ItCached->second;
+
+ // compute all join points
+ DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
+ auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop());
+
+ auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
+ assert(ItInserted.second);
+ return *ItInserted.first->second;
+}
+
+const ConstBlockSet &
+SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
+ // trivial case
+ if (Term.getNumSuccessors() < 1) {
+ return EmptyBlockSet;
+ }
+
+ // already available in cache?
+ auto ItCached = CachedBranchJoins.find(&Term);
+ if (ItCached != CachedBranchJoins.end())
+ return *ItCached->second;
+
+ // compute all join points
+ DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
+ const auto &TermBlock = *Term.getParent();
+ auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
+
+ auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
+ assert(ItInserted.second);
+ return *ItInserted.first->second;
+}
+
+} // namespace llvm
diff --git a/lib/Analysis/SyntheticCountsUtils.cpp b/lib/Analysis/SyntheticCountsUtils.cpp
index b085fa274d7f..c2d7bb11a4cf 100644
--- a/lib/Analysis/SyntheticCountsUtils.cpp
+++ b/lib/Analysis/SyntheticCountsUtils.cpp
@@ -14,22 +14,21 @@
#include "llvm/Analysis/SyntheticCountsUtils.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
using namespace llvm;
// Given an SCC, propagate entry counts along the edge of the SCC nodes.
template <typename CallGraphType>
void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
- const SccTy &SCC, GetRelBBFreqTy GetRelBBFreq, GetCountTy GetCount,
- AddCountTy AddCount) {
+ const SccTy &SCC, GetProfCountTy GetProfCount, AddCountTy AddCount) {
- SmallPtrSet<NodeRef, 8> SCCNodes;
+ DenseSet<NodeRef> SCCNodes;
SmallVector<std::pair<NodeRef, EdgeRef>, 8> SCCEdges, NonSCCEdges;
for (auto &Node : SCC)
@@ -54,17 +53,13 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
// This ensures that the order of
// traversal of nodes within the SCC doesn't affect the final result.
- DenseMap<NodeRef, uint64_t> AdditionalCounts;
+ DenseMap<NodeRef, Scaled64> AdditionalCounts;
for (auto &E : SCCEdges) {
- auto OptRelFreq = GetRelBBFreq(E.second);
- if (!OptRelFreq)
+ auto OptProfCount = GetProfCount(E.first, E.second);
+ if (!OptProfCount)
continue;
- Scaled64 RelFreq = OptRelFreq.getValue();
- auto Caller = E.first;
auto Callee = CGT::edge_dest(E.second);
- RelFreq *= Scaled64(GetCount(Caller), 0);
- uint64_t AdditionalCount = RelFreq.toInt<uint64_t>();
- AdditionalCounts[Callee] += AdditionalCount;
+ AdditionalCounts[Callee] += OptProfCount.getValue();
}
// Update the counts for the nodes in the SCC.
@@ -73,14 +68,11 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
// Now update the counts for nodes outside the SCC.
for (auto &E : NonSCCEdges) {
- auto OptRelFreq = GetRelBBFreq(E.second);
- if (!OptRelFreq)
+ auto OptProfCount = GetProfCount(E.first, E.second);
+ if (!OptProfCount)
continue;
- Scaled64 RelFreq = OptRelFreq.getValue();
- auto Caller = E.first;
auto Callee = CGT::edge_dest(E.second);
- RelFreq *= Scaled64(GetCount(Caller), 0);
- AddCount(Callee, RelFreq.toInt<uint64_t>());
+ AddCount(Callee, OptProfCount.getValue());
}
}
@@ -94,8 +86,7 @@ void SyntheticCountsUtils<CallGraphType>::propagateFromSCC(
template <typename CallGraphType>
void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
- GetRelBBFreqTy GetRelBBFreq,
- GetCountTy GetCount,
+ GetProfCountTy GetProfCount,
AddCountTy AddCount) {
std::vector<SccTy> SCCs;
@@ -107,7 +98,8 @@ void SyntheticCountsUtils<CallGraphType>::propagate(const CallGraphType &CG,
// The scc iterator returns the scc in bottom-up order, so reverse the SCCs
// and call propagateFromSCC.
for (auto &SCC : reverse(SCCs))
- propagateFromSCC(SCC, GetRelBBFreq, GetCount, AddCount);
+ propagateFromSCC(SCC, GetProfCount, AddCount);
}
template class llvm::SyntheticCountsUtils<const CallGraph *>;
+template class llvm::SyntheticCountsUtils<ModuleSummaryIndex *>;
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 102135fbf313..4643f75da42d 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -413,17 +413,17 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_flsll);
}
- // The following functions are available on Linux,
- // but Android uses bionic instead of glibc.
- if (!T.isOSLinux() || T.isAndroid()) {
+ // The following functions are only available on GNU/Linux (using glibc).
+ // Linux variants without glibc (eg: bionic, musl) may have some subset.
+ if (!T.isOSLinux() || !T.isGNUEnvironment()) {
TLI.setUnavailable(LibFunc_dunder_strdup);
TLI.setUnavailable(LibFunc_dunder_strtok_r);
TLI.setUnavailable(LibFunc_dunder_isoc99_scanf);
TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf);
TLI.setUnavailable(LibFunc_under_IO_getc);
TLI.setUnavailable(LibFunc_under_IO_putc);
- // But, Android has memalign.
- if (!T.isAndroid())
+ // But, Android and musl have memalign.
+ if (!T.isAndroid() && !T.isMusl())
TLI.setUnavailable(LibFunc_memalign);
TLI.setUnavailable(LibFunc_fopen64);
TLI.setUnavailable(LibFunc_fseeko64);
@@ -613,6 +613,24 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
unsigned NumParams = FTy.getNumParams();
switch (F) {
+ case LibFunc_execl:
+ case LibFunc_execlp:
+ case LibFunc_execle:
+ return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
+ case LibFunc_execv:
+ case LibFunc_execvp:
+ return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
+ case LibFunc_execvP:
+ case LibFunc_execvpe:
+ case LibFunc_execve:
+ return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getParamType(2)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32));
case LibFunc_strlen:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy());
@@ -863,6 +881,8 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
+ case LibFunc_fork:
+ return (NumParams == 0 && FTy.getReturnType()->isIntegerTy(32));
case LibFunc_fdopen:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
@@ -1399,10 +1419,10 @@ static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
- llvm::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+ llvm::sort(VectorDescs, compareByScalarFnName);
ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
- llvm::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+ llvm::sort(ScalarDescs, compareByVectorFnName);
}
void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 7233a86e5daf..9151d46c6cce 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -268,6 +268,10 @@ bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
return TTIImpl->enableInterleavedAccessVectorization();
}
+bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
+ return TTIImpl->enableMaskedInterleavedAccessVectorization();
+}
+
bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
}
@@ -384,6 +388,55 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
+TargetTransformInfo::OperandValueKind
+TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
+ OperandValueKind OpInfo = OK_AnyValue;
+ OpProps = OP_None;
+
+ if (auto *CI = dyn_cast<ConstantInt>(V)) {
+ if (CI->getValue().isPowerOf2())
+ OpProps = OP_PowerOf2;
+ return OK_UniformConstantValue;
+ }
+
+ // A broadcast shuffle creates a uniform value.
+ // TODO: Add support for non-zero index broadcasts.
+ // TODO: Add support for different source vector width.
+ if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
+ if (ShuffleInst->isZeroEltSplat())
+ OpInfo = OK_UniformValue;
+
+ const Value *Splat = getSplatValue(V);
+
+ // Check for a splat of a constant or for a non uniform vector of constants
+ // and check if the constant(s) are all powers of two.
+ if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+ OpInfo = OK_NonUniformConstantValue;
+ if (Splat) {
+ OpInfo = OK_UniformConstantValue;
+ if (auto *CI = dyn_cast<ConstantInt>(Splat))
+ if (CI->getValue().isPowerOf2())
+ OpProps = OP_PowerOf2;
+ } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
+ OpProps = OP_PowerOf2;
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
+ if (CI->getValue().isPowerOf2())
+ continue;
+ OpProps = OP_None;
+ break;
+ }
+ }
+ }
+
+ // Check for a splat of a uniform value. This is not loop aware, so return
+ // true only for the obviously uniform cases (argument, globalvalue)
+ if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+ OpInfo = OK_UniformValue;
+
+ return OpInfo;
+}
+
int TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
@@ -472,9 +525,12 @@ int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
int TargetTransformInfo::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) const {
+ unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
+ bool UseMaskForGaps) const {
int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace);
+ Alignment, AddressSpace,
+ UseMaskForCond,
+ UseMaskForGaps);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -569,6 +625,12 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
return TTIImpl->areInlineCompatible(Caller, Callee);
}
+bool TargetTransformInfo::areFunctionArgsABICompatible(
+ const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const {
+ return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
+}
+
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
Type *Ty) const {
return TTIImpl->isIndexedLoadLegal(Mode, Ty);
@@ -630,49 +692,6 @@ int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
return TTIImpl->getInstructionLatency(I);
}
-static TargetTransformInfo::OperandValueKind
-getOperandInfo(Value *V, TargetTransformInfo::OperandValueProperties &OpProps) {
- TargetTransformInfo::OperandValueKind OpInfo =
- TargetTransformInfo::OK_AnyValue;
- OpProps = TargetTransformInfo::OP_None;
-
- if (auto *CI = dyn_cast<ConstantInt>(V)) {
- if (CI->getValue().isPowerOf2())
- OpProps = TargetTransformInfo::OP_PowerOf2;
- return TargetTransformInfo::OK_UniformConstantValue;
- }
-
- const Value *Splat = getSplatValue(V);
-
- // Check for a splat of a constant or for a non uniform vector of constants
- // and check if the constant(s) are all powers of two.
- if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
- OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
- if (Splat) {
- OpInfo = TargetTransformInfo::OK_UniformConstantValue;
- if (auto *CI = dyn_cast<ConstantInt>(Splat))
- if (CI->getValue().isPowerOf2())
- OpProps = TargetTransformInfo::OP_PowerOf2;
- } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
- OpProps = TargetTransformInfo::OP_PowerOf2;
- for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
- if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
- if (CI->getValue().isPowerOf2())
- continue;
- OpProps = TargetTransformInfo::OP_None;
- break;
- }
- }
- }
-
- // Check for a splat of a uniform value. This is not loop aware, so return
- // true only for the obviously uniform cases (argument, globalvalue)
- if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
- OpInfo = TargetTransformInfo::OK_UniformValue;
-
- return OpInfo;
-}
-
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
unsigned Level) {
// We don't need a shuffle if we just want to have element 0 in position 0 of
@@ -1101,14 +1120,20 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
}
case Instruction::ShuffleVector: {
const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
- // TODO: Identify and add costs for insert/extract subvector, etc.
+ Type *Ty = Shuffle->getType();
+ Type *SrcTy = Shuffle->getOperand(0)->getType();
+
+ // TODO: Identify and add costs for insert subvector, etc.
+ int SubIndex;
+ if (Shuffle->isExtractSubvectorMask(SubIndex))
+ return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
+
if (Shuffle->changesLength())
return -1;
if (Shuffle->isIdentity())
return 0;
- Type *Ty = Shuffle->getType();
if (Shuffle->isReverse())
return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 25a154edf4ac..83974da30a54 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -399,20 +399,20 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
}
FunctionModRefBehavior
-TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
+TypeBasedAAResult::getModRefBehavior(const CallBase *Call) {
if (!EnableTBAA)
- return AAResultBase::getModRefBehavior(CS);
+ return AAResultBase::getModRefBehavior(Call);
FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
- if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
(isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
Min = FMRB_OnlyReadsMemory;
- return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+ return FunctionModRefBehavior(AAResultBase::getModRefBehavior(Call) & Min);
}
FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
@@ -420,33 +420,30 @@ FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
return AAResultBase::getModRefBehavior(F);
}
-ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
+ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
if (const MDNode *L = Loc.AATags.TBAA)
- if (const MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS, Loc);
+ return AAResultBase::getModRefInfo(Call, Loc);
}
-ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
+ const CallBase *Call2) {
if (!EnableTBAA)
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
- if (const MDNode *M1 =
- CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
- if (const MDNode *M2 =
- CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
return ModRefInfo::NoModRef;
- return AAResultBase::getModRefInfo(CS1, CS2);
+ return AAResultBase::getModRefInfo(Call1, Call2);
}
bool MDNode::isTBAAVtableAccess() const {
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index 6871e4887c9e..bd13a43b8d46 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
@@ -22,11 +23,21 @@ using namespace llvm;
// Search for virtual calls that call FPtr and add them to DevirtCalls.
static void
findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- bool *HasNonCallUses, Value *FPtr, uint64_t Offset) {
+ bool *HasNonCallUses, Value *FPtr, uint64_t Offset,
+ const CallInst *CI, DominatorTree &DT) {
for (const Use &U : FPtr->uses()) {
- Value *User = U.getUser();
+ Instruction *User = cast<Instruction>(U.getUser());
+ // Ignore this instruction if it is not dominated by the type intrinsic
+ // being analyzed. Otherwise we may transform a call sharing the same
+ // vtable pointer incorrectly. Specifically, this situation can arise
+ // after indirect call promotion and inlining, where we may have uses
+ // of the vtable pointer guarded by a function pointer check, and a fallback
+ // indirect call.
+ if (!DT.dominates(CI, User))
+ continue;
if (isa<BitCastInst>(User)) {
- findCallsAtConstantOffset(DevirtCalls, HasNonCallUses, User, Offset);
+ findCallsAtConstantOffset(DevirtCalls, HasNonCallUses, User, Offset, CI,
+ DT);
} else if (auto CI = dyn_cast<CallInst>(User)) {
DevirtCalls.push_back({Offset, CI});
} else if (auto II = dyn_cast<InvokeInst>(User)) {
@@ -38,23 +49,23 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
}
// Search for virtual calls that load from VPtr and add them to DevirtCalls.
-static void
-findLoadCallsAtConstantOffset(const Module *M,
- SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- Value *VPtr, int64_t Offset) {
+static void findLoadCallsAtConstantOffset(
+ const Module *M, SmallVectorImpl<DevirtCallSite> &DevirtCalls, Value *VPtr,
+ int64_t Offset, const CallInst *CI, DominatorTree &DT) {
for (const Use &U : VPtr->uses()) {
Value *User = U.getUser();
if (isa<BitCastInst>(User)) {
- findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset);
+ findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset, CI, DT);
} else if (isa<LoadInst>(User)) {
- findCallsAtConstantOffset(DevirtCalls, nullptr, User, Offset);
+ findCallsAtConstantOffset(DevirtCalls, nullptr, User, Offset, CI, DT);
} else if (auto GEP = dyn_cast<GetElementPtrInst>(User)) {
// Take into account the GEP offset.
if (VPtr == GEP->getPointerOperand() && GEP->hasAllConstantIndices()) {
SmallVector<Value *, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
int64_t GEPOffset = M->getDataLayout().getIndexedOffsetInType(
GEP->getSourceElementType(), Indices);
- findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset);
+ findLoadCallsAtConstantOffset(M, DevirtCalls, User, Offset + GEPOffset,
+ CI, DT);
}
}
}
@@ -62,7 +73,8 @@ findLoadCallsAtConstantOffset(const Module *M,
void llvm::findDevirtualizableCallsForTypeTest(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI) {
+ SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI,
+ DominatorTree &DT) {
assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test);
const Module *M = CI->getParent()->getParent()->getParent();
@@ -79,15 +91,15 @@ void llvm::findDevirtualizableCallsForTypeTest(
// If we found any, search for virtual calls based on %p and add them to
// DevirtCalls.
if (!Assumes.empty())
- findLoadCallsAtConstantOffset(M, DevirtCalls,
- CI->getArgOperand(0)->stripPointerCasts(), 0);
+ findLoadCallsAtConstantOffset(
+ M, DevirtCalls, CI->getArgOperand(0)->stripPointerCasts(), 0, CI, DT);
}
void llvm::findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
SmallVectorImpl<Instruction *> &LoadedPtrs,
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
- const CallInst *CI) {
+ const CallInst *CI, DominatorTree &DT) {
assert(CI->getCalledFunction()->getIntrinsicID() ==
Intrinsic::type_checked_load);
@@ -114,5 +126,5 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
for (Value *LoadedPtr : LoadedPtrs)
findCallsAtConstantOffset(DevirtCalls, &HasNonCallUses, LoadedPtr,
- Offset->getZExtValue());
+ Offset->getZExtValue(), CI, DT);
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 0ef39163bda3..0446426c0e66 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -118,14 +119,18 @@ struct Query {
/// (all of which can call computeKnownBits), and so on.
std::array<const Value *, MaxDepth> Excluded;
+ /// If true, it is safe to use metadata during simplification.
+ InstrInfoQuery IIQ;
+
unsigned NumExcluded = 0;
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
- : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {}
+ const DominatorTree *DT, bool UseInstrInfo,
+ OptimizationRemarkEmitter *ORE = nullptr)
+ : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
Query(const Query &Q, const Value *NewExcl)
- : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
+ : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ),
NumExcluded(Q.NumExcluded) {
Excluded = Q.Excluded;
Excluded[NumExcluded++] = NewExcl;
@@ -165,9 +170,9 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
::computeKnownBits(V, Known, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+ Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
}
static KnownBits computeKnownBits(const Value *V, unsigned Depth,
@@ -177,15 +182,16 @@ KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE) {
- return ::computeKnownBits(V, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
+ OptimizationRemarkEmitter *ORE,
+ bool UseInstrInfo) {
+ return ::computeKnownBits(
+ V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
}
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
+ const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
assert(LHS->getType() == RHS->getType() &&
"LHS and RHS should have the same type");
assert(LHS->getType()->isIntOrIntVectorTy() &&
@@ -201,8 +207,8 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
KnownBits LHSKnown(IT->getBitWidth());
KnownBits RHSKnown(IT->getBitWidth());
- computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
- computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
+ computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
+ computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
}
@@ -222,69 +228,71 @@ static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q);
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
- bool OrZero,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT));
+ bool OrZero, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::isKnownToBeAPowerOfTwo(
+ V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::isKnownNonZero(V, Depth,
+ Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
- unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
+ KnownBits Known =
+ computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
return Known.isNonNegative();
}
bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
+ const DominatorTree *DT, bool UseInstrInfo) {
if (auto *CI = dyn_cast<ConstantInt>(V))
return CI->getValue().isStrictlyPositive();
// TODO: We'd doing two recursive queries here. We should factor this such
// that only a single query is needed.
- return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) &&
- isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
+ return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) &&
+ isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
}
bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
+ const DominatorTree *DT, bool UseInstrInfo) {
+ KnownBits Known =
+ computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
return Known.isNegative();
}
static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
- const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::isKnownNonEqual(V1, V2, Query(DL, AC,
- safeCxtI(V1, safeCxtI(V2, CxtI)),
- DT));
+ const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
+ return ::isKnownNonEqual(V1, V2,
+ Query(DL, AC, safeCxtI(V1, safeCxtI(V2, CxtI)), DT,
+ UseInstrInfo, /*ORE=*/nullptr));
}
static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
const Query &Q);
bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
- const DataLayout &DL,
- unsigned Depth, AssumptionCache *AC,
- const Instruction *CxtI, const DominatorTree *DT) {
- return ::MaskedValueIsZero(V, Mask, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT));
+ const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::MaskedValueIsZero(
+ V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
@@ -293,8 +301,9 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::ComputeNumSignBits(
+ V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
@@ -965,7 +974,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
switch (I->getOpcode()) {
default: break;
case Instruction::Load:
- if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
+ if (MDNode *MD =
+ Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
computeKnownBitsFromRangeMetadata(*MD, Known);
break;
case Instruction::And: {
@@ -1014,7 +1024,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
break;
}
case Instruction::Mul: {
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
Known2, Depth, Q);
break;
@@ -1082,7 +1092,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// RHS from matchSelectPattern returns the negation part of abs pattern.
// If the negate has an NSW flag we can assume the sign bit of the result
// will be 0 because that makes abs(INT_MIN) undefined.
- if (cast<Instruction>(RHS)->hasNoSignedWrap())
+ if (Q.IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
MaxHighZeros = 1;
}
@@ -1151,7 +1161,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
}
case Instruction::Shl: {
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
APInt KZResult = KnownZero << ShiftAmt;
KZResult.setLowBits(ShiftAmt); // Low bits known 0.
@@ -1202,13 +1212,13 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
break;
}
case Instruction::Sub: {
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
Known, Known2, Depth, Q);
break;
}
case Instruction::Add: {
- bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
Known, Known2, Depth, Q);
break;
@@ -1369,7 +1379,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
Known3.countMinTrailingZeros()));
auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
- if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
+ if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) {
// If initial value of recurrence is nonnegative, and we are adding
// a nonnegative number with nsw, the result can only be nonnegative
// or poison value regardless of the number of times we execute the
@@ -1442,7 +1452,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// If range metadata is attached to this call, set known bits from that,
// and then intersect with known bits based on other properties of the
// function.
- if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
+ if (MDNode *MD =
+ Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
computeKnownBitsFromRangeMetadata(*MD, Known);
if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
computeKnownBits(RV, Known2, Depth + 1, Q);
@@ -1495,6 +1506,27 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
// of bits which might be set provided by popcnt KnownOne2.
break;
}
+ case Intrinsic::fshr:
+ case Intrinsic::fshl: {
+ const APInt *SA;
+ if (!match(I->getOperand(2), m_APInt(SA)))
+ break;
+
+ // Normalize to funnel shift left.
+ uint64_t ShiftAmt = SA->urem(BitWidth);
+ if (II->getIntrinsicID() == Intrinsic::fshr)
+ ShiftAmt = BitWidth - ShiftAmt;
+
+ KnownBits Known3(Known);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);
+
+ Known.Zero =
+ Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
+ Known.One =
+ Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
+ break;
+ }
case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);
break;
@@ -1722,7 +1754,8 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
// either the original power-of-two, a larger power-of-two or zero.
if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
- if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
+ if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
+ Q.IIQ.hasNoSignedWrap(VOBO)) {
if (match(X, m_And(m_Specific(Y), m_Value())) ||
match(X, m_And(m_Value(), m_Specific(Y))))
if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
@@ -1860,19 +1893,41 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
(Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
continue;
+ SmallVector<const User *, 4> WorkList;
+ SmallPtrSet<const User *, 4> Visited;
for (auto *CmpU : U->users()) {
- if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) {
- assert(BI->isConditional() && "uses a comparison!");
+ assert(WorkList.empty() && "Should be!");
+ if (Visited.insert(CmpU).second)
+ WorkList.push_back(CmpU);
+
+ while (!WorkList.empty()) {
+ auto *Curr = WorkList.pop_back_val();
+
+ // If a user is an AND, add all its users to the work list. We only
+ // propagate "pred != null" condition through AND because it is only
+ // correct to assume that all conditions of AND are met in true branch.
+ // TODO: Support similar logic of OR and EQ predicate?
+ if (Pred == ICmpInst::ICMP_NE)
+ if (auto *BO = dyn_cast<BinaryOperator>(Curr))
+ if (BO->getOpcode() == Instruction::And) {
+ for (auto *BOU : BO->users())
+ if (Visited.insert(BOU).second)
+ WorkList.push_back(BOU);
+ continue;
+ }
- BasicBlock *NonNullSuccessor =
- BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
- BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
- if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+ if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
+ assert(BI->isConditional() && "uses a comparison!");
+
+ BasicBlock *NonNullSuccessor =
+ BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
+ BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
+ if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+ return true;
+ } else if (Pred == ICmpInst::ICMP_NE && isGuard(Curr) &&
+ DT->dominates(cast<Instruction>(Curr), CtxI)) {
return true;
- } else if (Pred == ICmpInst::ICMP_NE &&
- match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) &&
- DT->dominates(cast<Instruction>(CmpU), CtxI)) {
- return true;
+ }
}
}
}
@@ -1937,7 +1992,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
if (auto *I = dyn_cast<Instruction>(V)) {
- if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
+ if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
// If the possible ranges don't contain zero, then the value is
// definitely non-zero.
if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
@@ -1965,13 +2020,13 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
// A Load tagged with nonnull metadata is never null.
if (const LoadInst *LI = dyn_cast<LoadInst>(V))
- if (LI->getMetadata(LLVMContext::MD_nonnull))
+ if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull))
return true;
- if (auto CS = ImmutableCallSite(V)) {
- if (CS.isReturnNonNull())
+ if (const auto *Call = dyn_cast<CallBase>(V)) {
+ if (Call->isReturnNonNull())
return true;
- if (const auto *RP = getArgumentAliasingToReturnedPointer(CS))
+ if (const auto *RP = getArgumentAliasingToReturnedPointer(Call))
return isKnownNonZero(RP, Depth, Q);
}
}
@@ -2003,7 +2058,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
// shl nuw can't remove any non-zero bits.
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
- if (BO->hasNoUnsignedWrap())
+ if (Q.IIQ.hasNoUnsignedWrap(BO))
return isKnownNonZero(X, Depth, Q);
KnownBits Known(BitWidth);
@@ -2078,7 +2133,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
- if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
+ if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) &&
isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q))
return true;
}
@@ -2100,7 +2155,8 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
if (!C->isZero() && !C->isNegative()) {
ConstantInt *X;
- if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+ if (Q.IIQ.UseInstrInfo &&
+ (match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
!X->isNegative())
return true;
@@ -2174,6 +2230,36 @@ bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
return Mask.isSubsetOf(Known.Zero);
}
+// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
+// Returns the input and lower/upper bounds.
+static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
+ const APInt *&CLow, const APInt *&CHigh) {
+ assert(isa<Operator>(Select) &&
+ cast<Operator>(Select)->getOpcode() == Instruction::Select &&
+ "Input should be a Select!");
+
+ const Value *LHS, *RHS, *LHS2, *RHS2;
+ SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
+ if (SPF != SPF_SMAX && SPF != SPF_SMIN)
+ return false;
+
+ if (!match(RHS, m_APInt(CLow)))
+ return false;
+
+ SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
+ if (getInverseMinMaxFlavor(SPF) != SPF2)
+ return false;
+
+ if (!match(RHS2, m_APInt(CHigh)))
+ return false;
+
+ if (SPF == SPF_SMIN)
+ std::swap(CLow, CHigh);
+
+ In = LHS2;
+ return CLow->sle(*CHigh);
+}
+
/// For vector constants, loop over the elements and find the constant with the
/// minimum number of sign bits. Return 0 if the value is not a vector constant
/// or if any element was not analyzed; otherwise, return the count for the
@@ -2335,11 +2421,19 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
}
break;
- case Instruction::Select:
+ case Instruction::Select: {
+ // If we have a clamp pattern, we know that the number of sign bits will be
+ // the minimum of the clamp min/max range.
+ const Value *X;
+ const APInt *CLow, *CHigh;
+ if (isSignedMinMaxClamp(U, X, CLow, CHigh))
+ return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
+
Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
if (Tmp == 1) break;
Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
return std::min(Tmp, Tmp2);
+ }
case Instruction::Add:
// Add can have at most one carry bit. Thus we know that the output
@@ -2437,6 +2531,44 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// valid for all elements of the vector (for example if vector is sign
// extended, shifted, etc).
return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+
+ case Instruction::ShuffleVector: {
+ // TODO: This is copied almost directly from the SelectionDAG version of
+ // ComputeNumSignBits. It would be better if we could share common
+ // code. If not, make sure that changes are translated to the DAG.
+
+ // Collect the minimum number of sign bits that are shared by every vector
+ // element referenced by the shuffle.
+ auto *Shuf = cast<ShuffleVectorInst>(U);
+ int NumElts = Shuf->getOperand(0)->getType()->getVectorNumElements();
+ int NumMaskElts = Shuf->getMask()->getType()->getVectorNumElements();
+ APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int M = Shuf->getMaskValue(i);
+ assert(M < NumElts * 2 && "Invalid shuffle mask constant");
+ // For undef elements, we don't know anything about the common state of
+ // the shuffle result.
+ if (M == -1)
+ return 1;
+ if (M < NumElts)
+ DemandedLHS.setBit(M % NumElts);
+ else
+ DemandedRHS.setBit(M % NumElts);
+ }
+ Tmp = std::numeric_limits<unsigned>::max();
+ if (!!DemandedLHS)
+ Tmp = ComputeNumSignBits(Shuf->getOperand(0), Depth + 1, Q);
+ if (!!DemandedRHS) {
+ Tmp2 = ComputeNumSignBits(Shuf->getOperand(1), Depth + 1, Q);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ // If we don't know anything, early out and try computeKnownBits fall-back.
+ if (Tmp == 1)
+ break;
+ assert(Tmp <= V->getType()->getScalarSizeInBits() &&
+ "Failed to determine minimum sign bits");
+ return Tmp;
+ }
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2722,6 +2854,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
break;
// sqrt(-0.0) = -0.0, no other negative results are possible.
case Intrinsic::sqrt:
+ case Intrinsic::canonicalize:
return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
// fabs(x) != -0.0
case Intrinsic::fabs:
@@ -2817,11 +2950,20 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
default:
break;
case Intrinsic::maxnum:
+ return (isKnownNeverNaN(I->getOperand(0), TLI) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
+ SignBitOnly, Depth + 1)) ||
+ (isKnownNeverNaN(I->getOperand(1), TLI) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
+ SignBitOnly, Depth + 1));
+
+ case Intrinsic::maximum:
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
Depth + 1) ||
cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
Depth + 1);
case Intrinsic::minnum:
+ case Intrinsic::minimum:
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
Depth + 1) &&
cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
@@ -2882,7 +3024,8 @@ bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
}
-bool llvm::isKnownNeverNaN(const Value *V) {
+bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
+ unsigned Depth) {
assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
// If we're told that NaNs won't happen, assume they won't.
@@ -2890,13 +3033,60 @@ bool llvm::isKnownNeverNaN(const Value *V) {
if (FPMathOp->hasNoNaNs())
return true;
- // TODO: Handle instructions and potentially recurse like other 'isKnown'
- // functions. For example, the result of sitofp is never NaN.
-
// Handle scalar constants.
if (auto *CFP = dyn_cast<ConstantFP>(V))
return !CFP->isNaN();
+ if (Depth == MaxDepth)
+ return false;
+
+ if (auto *Inst = dyn_cast<Instruction>(V)) {
+ switch (Inst->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FSub:
+ case Instruction::FDiv:
+ case Instruction::FRem: {
+ // TODO: Need isKnownNeverInfinity
+ return false;
+ }
+ case Instruction::Select: {
+ return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
+ isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
+ }
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ return true;
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1);
+ default:
+ break;
+ }
+ }
+
+ if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::canonicalize:
+ case Intrinsic::fabs:
+ case Intrinsic::copysign:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::floor:
+ case Intrinsic::ceil:
+ case Intrinsic::trunc:
+ case Intrinsic::rint:
+ case Intrinsic::nearbyint:
+ case Intrinsic::round:
+ return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1);
+ case Intrinsic::sqrt:
+ return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
+ CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
+ default:
+ return false;
+ }
+ }
+
// Bail out for constant expressions, but try to handle vector constants.
if (!V->getType()->isVectorTy() || !isa<Constant>(V))
return false;
@@ -2917,62 +3107,92 @@ bool llvm::isKnownNeverNaN(const Value *V) {
return true;
}
-/// If the specified value can be set by repeating the same byte in memory,
-/// return the i8 value that it is represented with. This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
Value *llvm::isBytewiseValue(Value *V) {
+
// All byte-wide stores are splatable, even of arbitrary variables.
- if (V->getType()->isIntegerTy(8)) return V;
+ if (V->getType()->isIntegerTy(8))
+ return V;
+
+ LLVMContext &Ctx = V->getContext();
+
+ // Undef don't care.
+ auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
+ if (isa<UndefValue>(V))
+ return UndefInt8;
+
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C) {
+ // Conceptually, we could handle things like:
+ // %a = zext i8 %X to i16
+ // %b = shl i16 %a, 8
+ // %c = or i16 %a, %b
+ // but until there is an example that actually needs this, it doesn't seem
+ // worth worrying about.
+ return nullptr;
+ }
// Handle 'null' ConstantArrayZero etc.
- if (Constant *C = dyn_cast<Constant>(V))
- if (C->isNullValue())
- return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+ if (C->isNullValue())
+ return Constant::getNullValue(Type::getInt8Ty(Ctx));
- // Constant float and double values can be handled as integer values if the
+ // Constant floating-point values can be handled as integer values if the
// corresponding integer value is "byteable". An important case is 0.0.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
- if (CFP->getType()->isFloatTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
- if (CFP->getType()->isDoubleTy())
- V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+ Type *Ty = nullptr;
+ if (CFP->getType()->isHalfTy())
+ Ty = Type::getInt16Ty(Ctx);
+ else if (CFP->getType()->isFloatTy())
+ Ty = Type::getInt32Ty(Ctx);
+ else if (CFP->getType()->isDoubleTy())
+ Ty = Type::getInt64Ty(Ctx);
// Don't handle long double formats, which have strange constraints.
+ return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
}
// We can handle constant integers that are multiple of 8 bits.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
if (CI->getBitWidth() % 8 == 0) {
assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
-
if (!CI->getValue().isSplat(8))
return nullptr;
- return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
+ return ConstantInt::get(Ctx, CI->getValue().trunc(8));
}
}
- // A ConstantDataArray/Vector is splatable if all its members are equal and
- // also splatable.
- if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
- Value *Elt = CA->getElementAsConstant(0);
- Value *Val = isBytewiseValue(Elt);
- if (!Val)
+ auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
+ if (LHS == RHS)
+ return LHS;
+ if (!LHS || !RHS)
return nullptr;
+ if (LHS == UndefInt8)
+ return RHS;
+ if (RHS == UndefInt8)
+ return LHS;
+ return nullptr;
+ };
- for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
- if (CA->getElementAsConstant(I) != Elt)
+ if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
+ Value *Val = UndefInt8;
+ for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
+ if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
return nullptr;
+ return Val;
+ }
+ if (isa<ConstantVector>(C)) {
+ Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
+ return Splat ? isBytewiseValue(Splat) : nullptr;
+ }
+
+ if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+ Value *Val = UndefInt8;
+ for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
+ if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
+ return nullptr;
return Val;
}
- // Conceptually, we could handle things like:
- // %a = zext i8 %X to i16
- // %b = shl i16 %a, 8
- // %c = or i16 %a, %b
- // but until there is an example that actually needs this, it doesn't seem
- // worth worrying about.
+ // Don't try to handle the handful of other constants.
return nullptr;
}
@@ -3169,7 +3389,14 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
break;
- ByteOffset += GEPOffset.getSExtValue();
+ APInt OrigByteOffset(ByteOffset);
+ ByteOffset += GEPOffset.sextOrTrunc(ByteOffset.getBitWidth());
+ if (ByteOffset.getMinSignedBits() > 64) {
+ // Stop traversal if the pointer offset wouldn't fit into int64_t
+ // (this should be removed if Offset is updated to an APInt)
+ ByteOffset = OrigByteOffset;
+ break;
+ }
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -3397,21 +3624,21 @@ uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
return Len == ~0ULL ? 1 : Len;
}
-const Value *llvm::getArgumentAliasingToReturnedPointer(ImmutableCallSite CS) {
- assert(CS &&
- "getArgumentAliasingToReturnedPointer only works on nonnull CallSite");
- if (const Value *RV = CS.getReturnedArgOperand())
+const Value *llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call) {
+ assert(Call &&
+ "getArgumentAliasingToReturnedPointer only works on nonnull calls");
+ if (const Value *RV = Call->getReturnedArgOperand())
return RV;
// This can be used only as a aliasing property.
- if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(CS))
- return CS.getArgOperand(0);
+ if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call))
+ return Call->getArgOperand(0);
return nullptr;
}
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
- ImmutableCallSite CS) {
- return CS.getIntrinsicID() == Intrinsic::launder_invariant_group ||
- CS.getIntrinsicID() == Intrinsic::strip_invariant_group;
+ const CallBase *Call) {
+ return Call->getIntrinsicID() == Intrinsic::launder_invariant_group ||
+ Call->getIntrinsicID() == Intrinsic::strip_invariant_group;
}
/// \p PN defines a loop-variant pointer to an object. Check if the
@@ -3459,7 +3686,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
// An alloca can't be further simplified.
return V;
} else {
- if (auto CS = CallSite(V)) {
+ if (auto *Call = dyn_cast<CallBase>(V)) {
// CaptureTracking can know about special capturing properties of some
// intrinsics like launder.invariant.group, that can't be expressed with
// the attributes, but have properties like returning aliasing pointer.
@@ -3469,7 +3696,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
// because it should be in sync with CaptureTracking. Not using it may
// cause weird miscompilations where 2 aliasing pointers are assumed to
// noalias.
- if (auto *RP = getArgumentAliasingToReturnedPointer(CS)) {
+ if (auto *RP = getArgumentAliasingToReturnedPointer(Call)) {
V = RP;
continue;
}
@@ -3602,8 +3829,7 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
if (!II) return false;
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
}
return true;
@@ -3700,12 +3926,10 @@ bool llvm::mayBeMemoryDependent(const Instruction &I) {
return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
}
-OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
- const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+OverflowResult llvm::computeOverflowForUnsignedMul(
+ const Value *LHS, const Value *RHS, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -3715,8 +3939,10 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
KnownBits LHSKnown(BitWidth);
KnownBits RHSKnown(BitWidth);
- computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
- computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
+ computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
+ UseInstrInfo);
+ computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT, nullptr,
+ UseInstrInfo);
// Note that underestimating the number of zero bits gives a more
// conservative answer.
unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
@@ -3747,12 +3973,11 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
return OverflowResult::MayOverflow;
}
-OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
- const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
+OverflowResult
+llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
+ const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT, bool UseInstrInfo) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -3781,33 +4006,33 @@ OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
// product is exactly the minimum negative number.
// E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
// For simplicity we just check if at least one side is not negative.
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
return OverflowResult::NeverOverflows;
}
return OverflowResult::MayOverflow;
}
-OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
- const Value *RHS,
- const DataLayout &DL,
- AssumptionCache *AC,
- const Instruction *CxtI,
- const DominatorTree *DT) {
- KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
+OverflowResult llvm::computeOverflowForUnsignedAdd(
+ const Value *LHS, const Value *RHS, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
+ KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
+ nullptr, UseInstrInfo);
if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
// The sign bit is set in both cases: this MUST overflow.
- // Create a simple add instruction, and insert it into the struct.
return OverflowResult::AlwaysOverflows;
}
if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
// The sign bit is clear in both cases: this CANNOT overflow.
- // Create a simple add instruction, and insert it into the struct.
return OverflowResult::NeverOverflows;
}
}
@@ -3924,11 +4149,18 @@ OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
- KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
- if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
- return OverflowResult::NeverOverflows;
+ if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
+ KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
+
+ // If the LHS is negative and the RHS is non-negative, no unsigned wrap.
+ if (LHSKnown.isNegative() && RHSKnown.isNonNegative())
+ return OverflowResult::NeverOverflows;
+
+ // If the LHS is non-negative and the RHS negative, we always wrap.
+ if (LHSKnown.isNonNegative() && RHSKnown.isNegative())
+ return OverflowResult::AlwaysOverflows;
+ }
return OverflowResult::MayOverflow;
}
@@ -4241,12 +4473,34 @@ static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
if (auto *C = dyn_cast<ConstantFP>(V))
return !C->isNaN();
+
+ if (auto *C = dyn_cast<ConstantDataVector>(V)) {
+ if (!C->getElementType()->isFloatingPointTy())
+ return false;
+ for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
+ if (C->getElementAsAPFloat(I).isNaN())
+ return false;
+ }
+ return true;
+ }
+
return false;
}
static bool isKnownNonZero(const Value *V) {
if (auto *C = dyn_cast<ConstantFP>(V))
return !C->isZero();
+
+ if (auto *C = dyn_cast<ConstantDataVector>(V)) {
+ if (!C->getElementType()->isFloatingPointTy())
+ return false;
+ for (unsigned I = 0, E = C->getNumElements(); I < E; ++I) {
+ if (C->getElementAsAPFloat(I).isZero())
+ return false;
+ }
+ return true;
+ }
+
return false;
}
@@ -4538,6 +4792,27 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS,
unsigned Depth) {
+ if (CmpInst::isFPPredicate(Pred)) {
+ // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
+ // 0.0 operand, set the compare's 0.0 operands to that same value for the
+ // purpose of identifying min/max. Disregard vector constants with undefined
+ // elements because those can not be back-propagated for analysis.
+ Value *OutputZeroVal = nullptr;
+ if (match(TrueVal, m_AnyZeroFP()) && !match(FalseVal, m_AnyZeroFP()) &&
+ !cast<Constant>(TrueVal)->containsUndefElement())
+ OutputZeroVal = TrueVal;
+ else if (match(FalseVal, m_AnyZeroFP()) && !match(TrueVal, m_AnyZeroFP()) &&
+ !cast<Constant>(FalseVal)->containsUndefElement())
+ OutputZeroVal = FalseVal;
+
+ if (OutputZeroVal) {
+ if (match(CmpLHS, m_AnyZeroFP()))
+ CmpLHS = OutputZeroVal;
+ if (match(CmpRHS, m_AnyZeroFP()))
+ CmpRHS = OutputZeroVal;
+ }
+ }
+
LHS = CmpLHS;
RHS = CmpRHS;
@@ -4967,21 +5242,16 @@ static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
return IsMatchingOps || IsSwappedOps;
}
-/// Return true if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS BRHS" is
-/// true. Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
-/// BRHS" is false. Otherwise, return None if we can't infer anything.
+/// Return true if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is true.
+/// Return false if "icmp1 APred X, Y" implies "icmp2 BPred X, Y" is false.
+/// Otherwise, return None if we can't infer anything.
static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
- const Value *ALHS,
- const Value *ARHS,
CmpInst::Predicate BPred,
- const Value *BLHS,
- const Value *BRHS,
- bool IsSwappedOps) {
- // Canonicalize the operands so they're matching.
- if (IsSwappedOps) {
- std::swap(BLHS, BRHS);
+ bool AreSwappedOps) {
+ // Canonicalize the predicate as if the operands were not commuted.
+ if (AreSwappedOps)
BPred = ICmpInst::getSwappedPredicate(BPred);
- }
+
if (CmpInst::isImpliedTrueByMatchingCmp(APred, BPred))
return true;
if (CmpInst::isImpliedFalseByMatchingCmp(APred, BPred))
@@ -4990,15 +5260,14 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
return None;
}
-/// Return true if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS C2" is
-/// true. Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
-/// C2" is false. Otherwise, return None if we can't infer anything.
+/// Return true if "icmp APred X, C1" implies "icmp BPred X, C2" is true.
+/// Return false if "icmp APred X, C1" implies "icmp BPred X, C2" is false.
+/// Otherwise, return None if we can't infer anything.
static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
+isImpliedCondMatchingImmOperands(CmpInst::Predicate APred,
const ConstantInt *C1,
CmpInst::Predicate BPred,
- const Value *BLHS, const ConstantInt *C2) {
- assert(ALHS == BLHS && "LHS operands must match.");
+ const ConstantInt *C2) {
ConstantRange DomCR =
ConstantRange::makeExactICmpRegion(APred, C1->getValue());
ConstantRange CR =
@@ -5030,10 +5299,10 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
ICmpInst::Predicate BPred = RHS->getPredicate();
// Can we infer anything when the two compares have matching operands?
- bool IsSwappedOps;
- if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, IsSwappedOps)) {
+ bool AreSwappedOps;
+ if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, AreSwappedOps)) {
if (Optional<bool> Implication = isImpliedCondMatchingOperands(
- APred, ALHS, ARHS, BPred, BLHS, BRHS, IsSwappedOps))
+ APred, BPred, AreSwappedOps))
return Implication;
// No amount of additional analysis will infer the second condition, so
// early exit.
@@ -5044,8 +5313,7 @@ static Optional<bool> isImpliedCondICmps(const ICmpInst *LHS,
// constants (not necessarily matching)?
if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) {
if (Optional<bool> Implication = isImpliedCondMatchingImmOperands(
- APred, ALHS, cast<ConstantInt>(ARHS), BPred, BLHS,
- cast<ConstantInt>(BRHS)))
+ APred, cast<ConstantInt>(ARHS), BPred, cast<ConstantInt>(BRHS)))
return Implication;
// No amount of additional analysis will infer the second condition, so
// early exit.
@@ -5130,3 +5398,35 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
}
return None;
}
+
+Optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
+ const Instruction *ContextI,
+ const DataLayout &DL) {
+ assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
+ if (!ContextI || !ContextI->getParent())
+ return None;
+
+ // TODO: This is a poor/cheap way to determine dominance. Should we use a
+ // dominator tree (eg, from a SimplifyQuery) instead?
+ const BasicBlock *ContextBB = ContextI->getParent();
+ const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
+ if (!PredBB)
+ return None;
+
+ // We need a conditional branch in the predecessor.
+ Value *PredCond;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB)))
+ return None;
+
+ // The branch should get simplified. Don't bother simplifying this condition.
+ if (TrueBB == FalseBB)
+ return None;
+
+ assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
+ "Predecessor block does not point to successor?");
+
+ // Is this condition implied by the predecessor condition?
+ bool CondIsTrue = TrueBB == ContextBB;
+ return isImpliedCondition(PredCond, Cond, DL, CondIsTrue);
+}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index d73d24736439..5656a19d7e0d 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -25,16 +26,30 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
+#define DEBUG_TYPE "vectorutils"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-/// Identify if the intrinsic is trivially vectorizable.
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
+/// Maximum factor for an interleaved memory access.
+static cl::opt<unsigned> MaxInterleaveGroupFactor(
+ "max-interleave-group-factor", cl::Hidden,
+ cl::desc("Maximum factor for an interleaved access group (default = 8)"),
+ cl::init(8));
+
+/// Return true if all of the intrinsic's arguments and return type are scalars
+/// for the scalar form of the intrinsic and vectors for the vector form of the
+/// intrinsic.
bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::sqrt:
+ case Intrinsic::bswap: // Begin integer bit-manipulation.
+ case Intrinsic::bitreverse:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::exp:
@@ -45,6 +60,8 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::fabs:
case Intrinsic::minnum:
case Intrinsic::maxnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum:
case Intrinsic::copysign:
case Intrinsic::floor:
case Intrinsic::ceil:
@@ -52,15 +69,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
- case Intrinsic::bswap:
- case Intrinsic::bitreverse:
- case Intrinsic::ctpop:
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
- case Intrinsic::ctlz:
- case Intrinsic::cttz:
case Intrinsic::powi:
+ case Intrinsic::canonicalize:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
return true;
default:
return false;
@@ -270,9 +287,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
}
// Extract a value from a vector add operation with a constant zero.
- Value *Val = nullptr; Constant *Con = nullptr;
- if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
- if (Constant *Elt = Con->getAggregateElement(EltNo))
+ // TODO: Use getBinOpIdentity() to generalize this.
+ Value *Val; Constant *C;
+ if (match(V, m_Add(m_Value(Val), m_Constant(C))))
+ if (Constant *Elt = C->getAggregateElement(EltNo))
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
@@ -450,16 +468,100 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
return MinBWs;
}
+/// Add all access groups in @p AccGroups to @p List.
+template <typename ListT>
+static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
+ // Interpret an access group as a list containing itself.
+ if (AccGroups->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
+ List.insert(AccGroups);
+ return;
+ }
+
+ for (auto &AccGroupListOp : AccGroups->operands()) {
+ auto *Item = cast<MDNode>(AccGroupListOp.get());
+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+ List.insert(Item);
+ }
+}
+
+MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
+ if (!AccGroups1)
+ return AccGroups2;
+ if (!AccGroups2)
+ return AccGroups1;
+ if (AccGroups1 == AccGroups2)
+ return AccGroups1;
+
+ SmallSetVector<Metadata *, 4> Union;
+ addToAccessGroupList(Union, AccGroups1);
+ addToAccessGroupList(Union, AccGroups2);
+
+ if (Union.size() == 0)
+ return nullptr;
+ if (Union.size() == 1)
+ return cast<MDNode>(Union.front());
+
+ LLVMContext &Ctx = AccGroups1->getContext();
+ return MDNode::get(Ctx, Union.getArrayRef());
+}
+
+MDNode *llvm::intersectAccessGroups(const Instruction *Inst1,
+ const Instruction *Inst2) {
+ bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
+ bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
+
+ if (!MayAccessMem1 && !MayAccessMem2)
+ return nullptr;
+ if (!MayAccessMem1)
+ return Inst2->getMetadata(LLVMContext::MD_access_group);
+ if (!MayAccessMem2)
+ return Inst1->getMetadata(LLVMContext::MD_access_group);
+
+ MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
+ MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
+ if (!MD1 || !MD2)
+ return nullptr;
+ if (MD1 == MD2)
+ return MD1;
+
+ // Use set for scalable 'contains' check.
+ SmallPtrSet<Metadata *, 4> AccGroupSet2;
+ addToAccessGroupList(AccGroupSet2, MD2);
+
+ SmallVector<Metadata *, 4> Intersection;
+ if (MD1->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
+ if (AccGroupSet2.count(MD1))
+ Intersection.push_back(MD1);
+ } else {
+ for (const MDOperand &Node : MD1->operands()) {
+ auto *Item = cast<MDNode>(Node.get());
+ assert(isValidAsAccessGroup(Item) && "List item must be an access group");
+ if (AccGroupSet2.count(Item))
+ Intersection.push_back(Item);
+ }
+ }
+
+ if (Intersection.size() == 0)
+ return nullptr;
+ if (Intersection.size() == 1)
+ return cast<MDNode>(Intersection.front());
+
+ LLVMContext &Ctx = Inst1->getContext();
+ return MDNode::get(Ctx, Intersection);
+}
+
/// \returns \p I after propagating metadata from \p VL.
Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
Instruction *I0 = cast<Instruction>(VL[0]);
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
I0->getAllMetadataOtherThanDebugLoc(Metadata);
- for (auto Kind :
- {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
- LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {
+ for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+ LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_access_group}) {
MDNode *MD = I0->getMetadata(Kind);
for (int J = 1, E = VL.size(); MD && J != E; ++J) {
@@ -480,6 +582,9 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
case LLVMContext::MD_invariant_load:
MD = MDNode::intersect(MD, IMD);
break;
+ case LLVMContext::MD_access_group:
+ MD = intersectAccessGroups(Inst, IJ);
+ break;
default:
llvm_unreachable("unhandled metadata");
}
@@ -491,6 +596,36 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
return Inst;
}
+Constant *
+llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF,
+ const InterleaveGroup<Instruction> &Group) {
+ // All 1's means mask is not needed.
+ if (Group.getNumMembers() == Group.getFactor())
+ return nullptr;
+
+ // TODO: support reversed access.
+ assert(!Group.isReverse() && "Reversed group not supported.");
+
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < Group.getFactor(); ++j) {
+ unsigned HasMember = Group.getMember(j) ? 1 : 0;
+ Mask.push_back(Builder.getInt1(HasMember));
+ }
+
+ return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createReplicatedMask(IRBuilder<> &Builder,
+ unsigned ReplicationFactor, unsigned VF) {
+ SmallVector<Constant *, 16> MaskVec;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < ReplicationFactor; j++)
+ MaskVec.push_back(Builder.getInt32(i));
+
+ return ConstantVector::get(MaskVec);
+}
+
Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
unsigned NumVecs) {
SmallVector<Constant *, 16> Mask;
@@ -575,3 +710,364 @@ Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
return ResList[0];
}
+
+bool InterleavedAccessInfo::isStrided(int Stride) {
+ unsigned Factor = std::abs(Stride);
+ return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
+}
+
+void InterleavedAccessInfo::collectConstStrideAccesses(
+ MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
+ const ValueToValueMap &Strides) {
+ auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
+
+ // Since it's desired that the load/store instructions be maintained in
+ // "program order" for the interleaved access analysis, we have to visit the
+ // blocks in the loop in reverse postorder (i.e., in a topological order).
+ // Such an ordering will ensure that any load/store that may be executed
+ // before a second load/store will precede the second load/store in
+ // AccessStrideInfo.
+ LoopBlocksDFS DFS(TheLoop);
+ DFS.perform(LI);
+ for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
+ for (auto &I : *BB) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ auto *SI = dyn_cast<StoreInst>(&I);
+ if (!LI && !SI)
+ continue;
+
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ // We don't check wrapping here because we don't know yet if Ptr will be
+ // part of a full group or a group with gaps. Checking wrapping for all
+ // pointers (even those that end up in groups with no gaps) will be overly
+ // conservative. For full groups, wrapping should be ok since if we would
+ // wrap around the address space we would do a memory access at nullptr
+ // even without the transformation. The wrapping checks are therefore
+ // deferred until after we've formed the interleaved groups.
+ int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ /*Assume=*/true, /*ShouldCheckWrap=*/false);
+
+ const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+
+ // An alignment of 0 means target ABI alignment.
+ unsigned Align = getLoadStoreAlignment(&I);
+ if (!Align)
+ Align = DL.getABITypeAlignment(PtrTy->getElementType());
+
+ AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+ }
+}
+
+// Analyze interleaved accesses and collect them into interleaved load and
+// store groups.
+//
+// When generating code for an interleaved load group, we effectively hoist all
+// loads in the group to the location of the first load in program order. When
+// generating code for an interleaved store group, we sink all stores to the
+// location of the last store. This code motion can change the order of load
+// and store instructions and may break dependences.
+//
+// The code generation strategy mentioned above ensures that we won't violate
+// any write-after-read (WAR) dependences.
+//
+// E.g., for the WAR dependence: a = A[i]; // (1)
+// A[i] = b; // (2)
+//
+// The store group of (2) is always inserted at or below (2), and the load
+// group of (1) is always inserted at or above (1). Thus, the instructions will
+// never be reordered. All other dependences are checked to ensure the
+// correctness of the instruction reordering.
+//
+// The algorithm visits all memory accesses in the loop in bottom-up program
+// order. Program order is established by traversing the blocks in the loop in
+// reverse postorder when collecting the accesses.
+//
+// We visit the memory accesses in bottom-up order because it can simplify the
+// construction of store groups in the presence of write-after-write (WAW)
+// dependences.
+//
+// E.g., for the WAW dependence: A[i] = a; // (1)
+// A[i] = b; // (2)
+// A[i + 1] = c; // (3)
+//
+// We will first create a store group with (3) and (2). (1) can't be added to
+// this group because it and (2) are dependent. However, (1) can be grouped
+// with other accesses that may precede it in program order. Note that a
+// bottom-up order does not imply that WAW dependences should not be checked.
+void InterleavedAccessInfo::analyzeInterleaving(
+ bool EnablePredicatedInterleavedMemAccesses) {
+ LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
+ const ValueToValueMap &Strides = LAI->getSymbolicStrides();
+
+ // Holds all accesses with a constant stride.
+ MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
+ collectConstStrideAccesses(AccessStrideInfo, Strides);
+
+ if (AccessStrideInfo.empty())
+ return;
+
+ // Collect the dependences in the loop.
+ collectDependences();
+
+ // Holds all interleaved store groups temporarily.
+ SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
+ // Holds all interleaved load groups temporarily.
+ SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;
+
+ // Search in bottom-up program order for pairs of accesses (A and B) that can
+ // form interleaved load or store groups. In the algorithm below, access A
+ // precedes access B in program order. We initialize a group for B in the
+ // outer loop of the algorithm, and then in the inner loop, we attempt to
+ // insert each A into B's group if:
+ //
+ // 1. A and B have the same stride,
+ // 2. A and B have the same memory object size, and
+ // 3. A belongs in B's group according to its distance from B.
+ //
+ // Special care is taken to ensure group formation will not break any
+ // dependences.
+ for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
+ BI != E; ++BI) {
+ Instruction *B = BI->first;
+ StrideDescriptor DesB = BI->second;
+
+ // Initialize a group for B if it has an allowable stride. Even if we don't
+ // create a group for B, we continue with the bottom-up algorithm to ensure
+ // we don't break any of B's dependences.
+ InterleaveGroup<Instruction> *Group = nullptr;
+ if (isStrided(DesB.Stride) &&
+ (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
+ Group = getInterleaveGroup(B);
+ if (!Group) {
+ LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
+ << '\n');
+ Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
+ }
+ if (B->mayWriteToMemory())
+ StoreGroups.insert(Group);
+ else
+ LoadGroups.insert(Group);
+ }
+
+ for (auto AI = std::next(BI); AI != E; ++AI) {
+ Instruction *A = AI->first;
+ StrideDescriptor DesA = AI->second;
+
+ // Our code motion strategy implies that we can't have dependences
+ // between accesses in an interleaved group and other accesses located
+ // between the first and last member of the group. Note that this also
+ // means that a group can't have more than one member at a given offset.
+ // The accesses in a group can have dependences with other accesses, but
+ // we must ensure we don't extend the boundaries of the group such that
+ // we encompass those dependent accesses.
+ //
+ // For example, assume we have the sequence of accesses shown below in a
+ // stride-2 loop:
+ //
+ // (1, 2) is a group | A[i] = a; // (1)
+ // | A[i-1] = b; // (2) |
+ // A[i-3] = c; // (3)
+ // A[i] = d; // (4) | (2, 4) is not a group
+ //
+ // Because accesses (2) and (3) are dependent, we can group (2) with (1)
+ // but not with (4). If we did, the dependent access (3) would be within
+ // the boundaries of the (2, 4) group.
+ if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
+ // If a dependence exists and A is already in a group, we know that A
+ // must be a store since A precedes B and WAR dependences are allowed.
+ // Thus, A would be sunk below B. We release A's group to prevent this
+ // illegal code motion. A will then be free to form another group with
+ // instructions that precede it.
+ if (isInterleaved(A)) {
+ InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
+ StoreGroups.remove(StoreGroup);
+ releaseGroup(StoreGroup);
+ }
+
+ // If a dependence exists and A is not already in a group (or it was
+ // and we just released it), B might be hoisted above A (if B is a
+ // load) or another store might be sunk below A (if B is a store). In
+ // either case, we can't add additional instructions to B's group. B
+ // will only form a group with instructions that it precedes.
+ break;
+ }
+
+ // At this point, we've checked for illegal code motion. If either A or B
+ // isn't strided, there's nothing left to do.
+ if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
+ continue;
+
+ // Ignore A if it's already in a group or isn't the same kind of memory
+ // operation as B.
+ // Note that mayReadFromMemory() isn't mutually exclusive to
+ // mayWriteToMemory in the case of atomic loads. We shouldn't see those
+ // here, canVectorizeMemory() should have returned false - except for the
+ // case we asked for optimization remarks.
+ if (isInterleaved(A) ||
+ (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
+ (A->mayWriteToMemory() != B->mayWriteToMemory()))
+ continue;
+
+ // Check rules 1 and 2. Ignore A if its stride or size is different from
+ // that of B.
+ if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
+ continue;
+
+ // Ignore A if the memory object of A and B don't belong to the same
+ // address space
+ if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
+ continue;
+
+ // Calculate the distance from A to B.
+ const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
+ PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
+ if (!DistToB)
+ continue;
+ int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
+
+ // Check rule 3. Ignore A if its distance to B is not a multiple of the
+ // size.
+ if (DistanceToB % static_cast<int64_t>(DesB.Size))
+ continue;
+
+ // All members of a predicated interleave-group must have the same predicate,
+ // and currently must reside in the same BB.
+ BasicBlock *BlockA = A->getParent();
+ BasicBlock *BlockB = B->getParent();
+ if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
+ (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
+ continue;
+
+ // The index of A is the index of B plus A's distance to B in multiples
+ // of the size.
+ int IndexA =
+ Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
+
+ // Try to insert A into B's group.
+ if (Group->insertMember(A, IndexA, DesA.Align)) {
+ LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
+ << " into the interleave group with" << *B
+ << '\n');
+ InterleaveGroupMap[A] = Group;
+
+ // Set the first load in program order as the insert position.
+ if (A->mayReadFromMemory())
+ Group->setInsertPos(A);
+ }
+ } // Iteration over A accesses.
+ } // Iteration over B accesses.
+
+ // Remove interleaved store groups with gaps.
+ for (auto *Group : StoreGroups)
+ if (Group->getNumMembers() != Group->getFactor()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
+ releaseGroup(Group);
+ }
+ // Remove interleaved groups with gaps (currently only loads) whose memory
+ // accesses may wrap around. We have to revisit the getPtrStride analysis,
+ // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
+ // not check wrapping (see documentation there).
+ // FORNOW we use Assume=false;
+ // TODO: Change to Assume=true but making sure we don't exceed the threshold
+ // of runtime SCEV assumptions checks (thereby potentially failing to
+ // vectorize altogether).
+ // Additional optional optimizations:
+ // TODO: If we are peeling the loop and we know that the first pointer doesn't
+ // wrap then we can deduce that all pointers in the group don't wrap.
+ // This means that we can forcefully peel the loop in order to only have to
+ // check the first pointer for no-wrap. When we'll change to use Assume=true
+ // we'll only need at most one runtime check per interleaved group.
+ for (auto *Group : LoadGroups) {
+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+ // load would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
+ continue;
+
+ // Case 2: If first and last members of the group don't wrap this implies
+ // that all the pointers in the group don't wrap.
+ // So we check only group member 0 (which is always guaranteed to exist),
+ // and group member Factor - 1; If the latter doesn't exist we rely on
+ // peeling (if it is a non-reveresed accsess -- see Case 3).
+ Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
+ if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "first group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
+ if (LastMember) {
+ Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
+ if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ /*ShouldCheckWrap=*/true)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "last group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ }
+ } else {
+ // Case 3: A non-reversed interleaved load group with gaps: We need
+ // to execute at least one scalar epilogue iteration. This will ensure
+ // we don't speculatively access memory out-of-bounds. We only need
+ // to look for a member at index factor - 1, since every group must have
+ // a member at index zero.
+ if (Group->isReverse()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved group due to "
+ "a reverse access with gaps.\n");
+ releaseGroup(Group);
+ continue;
+ }
+ LLVM_DEBUG(
+ dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+ RequiresScalarEpilogue = true;
+ }
+ }
+}
+
+void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
+ // If no group had triggered the requirement to create an epilogue loop,
+ // there is nothing to do.
+ if (!requiresScalarEpilogue())
+ return;
+
+ // Avoid releasing a Group twice.
+ SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
+ for (auto &I : InterleaveGroupMap) {
+ InterleaveGroup<Instruction> *Group = I.second;
+ if (Group->requiresScalarEpilogue())
+ DelSet.insert(Group);
+ }
+ for (auto *Ptr : DelSet) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Invalidate candidate interleaved group due to gaps that "
+ "require a scalar epilogue (not allowed under optsize) and cannot "
+ "be masked (not enabled). \n");
+ releaseGroup(Ptr);
+ }
+
+ RequiresScalarEpilogue = false;
+}
+
+template <typename InstT>
+void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
+ llvm_unreachable("addMetadata can only be used for Instruction");
+}
+
+namespace llvm {
+template <>
+void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
+ SmallVector<Value *, 4> VL;
+ std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
+ [](std::pair<int, Instruction *> p) { return p.second; });
+ propagateMetadata(NewInst, VL);
+}
+}