summaryrefslogtreecommitdiff
path: root/lib/Analysis
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-01-18 16:17:27 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-01-18 16:17:27 +0000
commit67c32a98315f785a9ec9d531c1f571a0196c7463 (patch)
tree4abb9cbeecc7901726dd0b4a37369596c852e9ef /lib/Analysis
parent9f61947910e6ab40de38e6b4034751ef1513200f (diff)
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp85
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp8
-rw-r--r--lib/Analysis/AliasSetTracker.cpp105
-rw-r--r--lib/Analysis/Analysis.cpp2
-rw-r--r--lib/Analysis/AssumptionCache.cpp125
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp269
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp3
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp3
-rw-r--r--lib/Analysis/CFG.cpp6
-rw-r--r--lib/Analysis/CFGPrinter.cpp16
-rw-r--r--lib/Analysis/CFLAliasAnalysis.cpp1013
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp103
-rw-r--r--lib/Analysis/CMakeLists.txt4
-rw-r--r--lib/Analysis/CaptureTracking.cpp4
-rw-r--r--lib/Analysis/CodeMetrics.cpp92
-rw-r--r--lib/Analysis/ConstantFolding.cpp121
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp73
-rw-r--r--lib/Analysis/FunctionTargetTransformInfo.cpp50
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt1
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp3
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp11
-rw-r--r--lib/Analysis/IPA/FindUsedTypes.cpp100
-rw-r--r--lib/Analysis/IPA/IPA.cpp1
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp36
-rw-r--r--lib/Analysis/IVUsers.cpp8
-rw-r--r--lib/Analysis/InstructionSimplify.cpp877
-rw-r--r--lib/Analysis/JumpInstrTableInfo.cpp17
-rw-r--r--lib/Analysis/LazyCallGraph.cpp15
-rw-r--r--lib/Analysis/LazyValueInfo.cpp553
-rw-r--r--lib/Analysis/LibCallSemantics.cpp4
-rw-r--r--lib/Analysis/Lint.cpp30
-rw-r--r--lib/Analysis/Loads.cpp192
-rw-r--r--lib/Analysis/LoopInfo.cpp4
-rw-r--r--lib/Analysis/LoopPass.cpp11
-rw-r--r--lib/Analysis/MemDepPrinter.cpp28
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp10
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp251
-rw-r--r--lib/Analysis/NoAliasAnalysis.cpp5
-rw-r--r--lib/Analysis/PHITransAddr.cpp6
-rw-r--r--lib/Analysis/PtrUseVisitor.cpp2
-rw-r--r--lib/Analysis/RegionInfo.cpp2
-rw-r--r--lib/Analysis/RegionPass.cpp3
-rw-r--r--lib/Analysis/ScalarEvolution.cpp1212
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp4
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp2
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp245
-rw-r--r--lib/Analysis/StratifiedSets.h692
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp67
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp58
-rw-r--r--lib/Analysis/ValueTracking.cpp1159
51 files changed, 5854 insertions, 1841 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 8b8106b950a0..6eea8177418a 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -196,12 +196,11 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
if (!Arg->getType()->isPointerTy())
continue;
ModRefResult ArgMask;
- Location CS1Loc =
- getArgLocation(CS1, (unsigned) std::distance(CS1.arg_begin(), I),
- ArgMask);
- // ArgMask indicates what CS1 might do to CS1Loc; if CS1 might Mod
- // CS1Loc, then we care about either a Mod or a Ref by CS2. If CS1
- // might Ref, then we care only about a Mod by CS2.
+ Location CS1Loc = getArgLocation(
+ CS1, (unsigned)std::distance(CS1.arg_begin(), I), ArgMask);
+ // ArgMask indicates what CS1 might do to CS1Loc; if CS1 might Mod
+ // CS1Loc, then we care about either a Mod or a Ref by CS2. If CS1
+ // might Ref, then we care only about a Mod by CS2.
ModRefResult ArgR = getModRefInfo(CS2, CS1Loc);
if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) ||
((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef))
@@ -252,61 +251,73 @@ AliasAnalysis::getModRefBehavior(const Function *F) {
//===----------------------------------------------------------------------===//
AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+ AAMDNodes AATags;
+ LI->getAAMetadata(AATags);
+
return Location(LI->getPointerOperand(),
- getTypeStoreSize(LI->getType()),
- LI->getMetadata(LLVMContext::MD_tbaa));
+ getTypeStoreSize(LI->getType()), AATags);
}
AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+ AAMDNodes AATags;
+ SI->getAAMetadata(AATags);
+
return Location(SI->getPointerOperand(),
- getTypeStoreSize(SI->getValueOperand()->getType()),
- SI->getMetadata(LLVMContext::MD_tbaa));
+ getTypeStoreSize(SI->getValueOperand()->getType()), AATags);
}
AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
- return Location(VI->getPointerOperand(),
- UnknownSize,
- VI->getMetadata(LLVMContext::MD_tbaa));
+ AAMDNodes AATags;
+ VI->getAAMetadata(AATags);
+
+ return Location(VI->getPointerOperand(), UnknownSize, AATags);
}
AliasAnalysis::Location
AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) {
+ AAMDNodes AATags;
+ CXI->getAAMetadata(AATags);
+
return Location(CXI->getPointerOperand(),
getTypeStoreSize(CXI->getCompareOperand()->getType()),
- CXI->getMetadata(LLVMContext::MD_tbaa));
+ AATags);
}
AliasAnalysis::Location
AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) {
+ AAMDNodes AATags;
+ RMWI->getAAMetadata(AATags);
+
return Location(RMWI->getPointerOperand(),
- getTypeStoreSize(RMWI->getValOperand()->getType()),
- RMWI->getMetadata(LLVMContext::MD_tbaa));
+ getTypeStoreSize(RMWI->getValOperand()->getType()), AATags);
}
-AliasAnalysis::Location
+AliasAnalysis::Location
AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
uint64_t Size = UnknownSize;
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
Size = C->getValue().getZExtValue();
- // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+ AAMDNodes AATags;
+ MTI->getAAMetadata(AATags);
- return Location(MTI->getRawSource(), Size, TBAATag);
+ return Location(MTI->getRawSource(), Size, AATags);
}
-AliasAnalysis::Location
+AliasAnalysis::Location
AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
uint64_t Size = UnknownSize;
if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
Size = C->getValue().getZExtValue();
- // memcpy/memmove can have TBAA tags. For memcpy, they apply
+ // memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
-
- return Location(MTI->getRawDest(), Size, TBAATag);
+ AAMDNodes AATags;
+ MTI->getAAMetadata(AATags);
+
+ return Location(MTI->getRawDest(), Size, AATags);
}
@@ -428,7 +439,7 @@ AliasAnalysis::callCapturesBefore(const Instruction *I,
// assume that the call could touch the pointer, even though it doesn't
// escape.
if (isNoAlias(AliasAnalysis::Location(*CI),
- AliasAnalysis::Location(Object)))
+ AliasAnalysis::Location(Object)))
continue;
if (CS.doesNotAccessMemory(ArgNo))
continue;
@@ -472,21 +483,22 @@ uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
}
/// canBasicBlockModify - Return true if it is possible for execution of the
-/// specified basic block to modify the value pointed to by Ptr.
+/// specified basic block to modify the location Loc.
///
bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
const Location &Loc) {
- return canInstructionRangeModify(BB.front(), BB.back(), Loc);
+ return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod);
}
-/// canInstructionRangeModify - Return true if it is possible for the execution
-/// of the specified instructions to modify the value pointed to by Ptr. The
-/// instructions to consider are all of the instructions in the range of [I1,I2]
-/// INCLUSIVE. I1 and I2 must be in the same basic block.
-///
-bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+/// canInstructionRangeModRef - Return true if it is possible for the
+/// execution of the specified instructions to mod\ref (according to the
+/// mode) the location Loc. The instructions to consider are all
+/// of the instructions in the range of [I1,I2] INCLUSIVE.
+/// I1 and I2 must be in the same basic block.
+bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
const Instruction &I2,
- const Location &Loc) {
+ const Location &Loc,
+ const ModRefResult Mode) {
assert(I1.getParent() == I2.getParent() &&
"Instructions not in same basic block!");
BasicBlock::const_iterator I = &I1;
@@ -494,7 +506,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(I, Loc) & Mod)
+ if (getModRefInfo(I, Loc) & Mode)
return true;
return false;
}
@@ -545,4 +557,3 @@ bool llvm::isIdentifiedFunctionLocal(const Value *V)
{
return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasArgument(V);
}
-
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index d9fa5a500f36..fe4bd4cc2109 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -43,7 +43,7 @@ static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
-static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden);
+static cl::opt<bool> EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden);
namespace {
class AAEval : public FunctionPass {
@@ -153,9 +153,9 @@ bool AAEval::runOnFunction(Function &F) {
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
if (I->getType()->isPointerTy()) // Add all pointer instructions.
Pointers.insert(&*I);
- if (EvalTBAA && isa<LoadInst>(&*I))
+ if (EvalAAMD && isa<LoadInst>(&*I))
Loads.insert(&*I);
- if (EvalTBAA && isa<StoreInst>(&*I))
+ if (EvalAAMD && isa<StoreInst>(&*I))
Stores.insert(&*I);
Instruction &Inst = *I;
if (CallSite CS = cast<Value>(&Inst)) {
@@ -213,7 +213,7 @@ bool AAEval::runOnFunction(Function &F) {
}
}
- if (EvalTBAA) {
+ if (EvalAAMD) {
// iterate over all pairs of load, store
for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end();
I1 != E; ++I1) {
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 75b6be06aa8e..45442b025629 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -47,10 +47,10 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
// If the pointers are not a must-alias pair, this set becomes a may alias.
if (AA.alias(AliasAnalysis::Location(L->getValue(),
L->getSize(),
- L->getTBAAInfo()),
+ L->getAAInfo()),
AliasAnalysis::Location(R->getValue(),
R->getSize(),
- R->getTBAAInfo()))
+ R->getAAInfo()))
!= AliasAnalysis::MustAlias)
AliasTy = MayAlias;
}
@@ -97,7 +97,7 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) {
}
void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
- uint64_t Size, const MDNode *TBAAInfo,
+ uint64_t Size, const AAMDNodes &AAInfo,
bool KnownMustAlias) {
assert(!Entry.hasAliasSet() && "Entry already in set!");
@@ -107,17 +107,17 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
AliasAnalysis &AA = AST.getAliasAnalysis();
AliasAnalysis::AliasResult Result =
AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
- P->getTBAAInfo()),
- AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+ P->getAAInfo()),
+ AliasAnalysis::Location(Entry.getValue(), Size, AAInfo));
if (Result != AliasAnalysis::MustAlias)
AliasTy = MayAlias;
else // First entry of must alias must have maximum size!
- P->updateSizeAndTBAAInfo(Size, TBAAInfo);
+ P->updateSizeAndAAInfo(Size, AAInfo);
assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
}
Entry.setAliasSet(this);
- Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+ Entry.updateSizeAndAAInfo(Size, AAInfo);
// Add it to the end of the list...
assert(*PtrListEnd == nullptr && "End of list is not null?");
@@ -147,7 +147,7 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
/// alias one of the members in the set.
///
bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
AliasAnalysis &AA) const {
if (AliasTy == MustAlias) {
assert(UnknownInsts.empty() && "Illegal must alias set!");
@@ -158,23 +158,23 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
assert(SomePtr && "Empty must-alias set??");
return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
SomePtr->getSize(),
- SomePtr->getTBAAInfo()),
- AliasAnalysis::Location(Ptr, Size, TBAAInfo));
+ SomePtr->getAAInfo()),
+ AliasAnalysis::Location(Ptr, Size, AAInfo));
}
// If this is a may-alias set, we have to check all of the pointers in the set
// to be sure it doesn't alias the set...
for (iterator I = begin(), E = end(); I != E; ++I)
- if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+ if (AA.alias(AliasAnalysis::Location(Ptr, Size, AAInfo),
AliasAnalysis::Location(I.getPointer(), I.getSize(),
- I.getTBAAInfo())))
+ I.getAAInfo())))
return true;
// Check the unknown instructions...
if (!UnknownInsts.empty()) {
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
if (AA.getModRefInfo(UnknownInsts[i],
- AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+ AliasAnalysis::Location(Ptr, Size, AAInfo)) !=
AliasAnalysis::NoModRef)
return true;
}
@@ -197,7 +197,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
for (iterator I = begin(), E = end(); I != E; ++I)
if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(),
I.getSize(),
- I.getTBAAInfo())) !=
+ I.getAAInfo())) !=
AliasAnalysis::NoModRef)
return true;
@@ -223,11 +223,11 @@ void AliasSetTracker::clear() {
///
AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
uint64_t Size,
- const MDNode *TBAAInfo) {
+ const AAMDNodes &AAInfo) {
AliasSet *FoundSet = nullptr;
for (iterator I = begin(), E = end(); I != E;) {
iterator Cur = I++;
- if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+ if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
if (!FoundSet) { // If this is the first alias set ptr can go into.
FoundSet = Cur; // Remember it.
@@ -243,14 +243,19 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
/// this alias set, false otherwise. This does not modify the AST object or
/// alias sets.
bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
- const MDNode *TBAAInfo) const {
+ const AAMDNodes &AAInfo) const {
for (const_iterator I = begin(), E = end(); I != E; ++I)
- if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
+ if (!I->Forward && I->aliasesPointer(Ptr, Size, AAInfo, AA))
return true;
return false;
}
-
+bool AliasSetTracker::containsUnknown(Instruction *Inst) const {
+ for (const_iterator I = begin(), E = end(); I != E; ++I)
+ if (!I->Forward && I->aliasesUnknownInst(Inst, AA))
+ return true;
+ return false;
+}
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
AliasSet *FoundSet = nullptr;
@@ -272,67 +277,75 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
/// getAliasSetForPointer - Return the alias set that the specified pointer
/// lives in.
AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
- const MDNode *TBAAInfo,
+ const AAMDNodes &AAInfo,
bool *New) {
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
// Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
- Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+ Entry.updateSizeAndAAInfo(Size, AAInfo);
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
}
- if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, AAInfo)) {
// Add it to the alias set it aliases.
- AS->addPointer(*this, Entry, Size, TBAAInfo);
+ AS->addPointer(*this, Entry, Size, AAInfo);
return *AS;
}
if (New) *New = true;
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
- AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
+ AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
return AliasSets.back();
}
-bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
bool NewPtr;
- addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
+ addPointer(Ptr, Size, AAInfo, AliasSet::NoModRef, NewPtr);
return NewPtr;
}
bool AliasSetTracker::add(LoadInst *LI) {
if (LI->getOrdering() > Monotonic) return addUnknown(LI);
+
+ AAMDNodes AAInfo;
+ LI->getAAMetadata(AAInfo);
+
AliasSet::AccessType ATy = AliasSet::Refs;
bool NewPtr;
AliasSet &AS = addPointer(LI->getOperand(0),
AA.getTypeStoreSize(LI->getType()),
- LI->getMetadata(LLVMContext::MD_tbaa),
- ATy, NewPtr);
+ AAInfo, ATy, NewPtr);
if (LI->isVolatile()) AS.setVolatile();
return NewPtr;
}
bool AliasSetTracker::add(StoreInst *SI) {
if (SI->getOrdering() > Monotonic) return addUnknown(SI);
+
+ AAMDNodes AAInfo;
+ SI->getAAMetadata(AAInfo);
+
AliasSet::AccessType ATy = AliasSet::Mods;
bool NewPtr;
Value *Val = SI->getOperand(0);
AliasSet &AS = addPointer(SI->getOperand(1),
AA.getTypeStoreSize(Val->getType()),
- SI->getMetadata(LLVMContext::MD_tbaa),
- ATy, NewPtr);
+ AAInfo, ATy, NewPtr);
if (SI->isVolatile()) AS.setVolatile();
return NewPtr;
}
bool AliasSetTracker::add(VAArgInst *VAAI) {
+ AAMDNodes AAInfo;
+ VAAI->getAAMetadata(AAInfo);
+
bool NewPtr;
addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize,
- VAAI->getMetadata(LLVMContext::MD_tbaa),
- AliasSet::ModRef, NewPtr);
+ AAInfo, AliasSet::ModRef, NewPtr);
return NewPtr;
}
@@ -390,7 +403,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
bool X;
for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
- ASI.getTBAAInfo(),
+ ASI.getAAInfo(),
(AliasSet::AccessType)AS.AccessTy, X);
if (AS.isVolatile()) NewAS.setVolatile();
}
@@ -429,8 +442,8 @@ void AliasSetTracker::remove(AliasSet &AS) {
}
bool
-AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
- AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+ AliasSet *AS = findAliasSetForPointer(Ptr, Size, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
@@ -438,8 +451,11 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
bool AliasSetTracker::remove(LoadInst *LI) {
uint64_t Size = AA.getTypeStoreSize(LI->getType());
- const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
- AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
+
+ AAMDNodes AAInfo;
+ LI->getAAMetadata(AAInfo);
+
+ AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
@@ -447,17 +463,22 @@ bool AliasSetTracker::remove(LoadInst *LI) {
bool AliasSetTracker::remove(StoreInst *SI) {
uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
- const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
- AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
+
+ AAMDNodes AAInfo;
+ SI->getAAMetadata(AAInfo);
+
+ AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
}
bool AliasSetTracker::remove(VAArgInst *VAAI) {
+ AAMDNodes AAInfo;
+ VAAI->getAAMetadata(AAInfo);
+
AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
- AliasAnalysis::UnknownSize,
- VAAI->getMetadata(LLVMContext::MD_tbaa));
+ AliasAnalysis::UnknownSize, AAInfo);
if (!AS) return false;
remove(*AS);
return true;
@@ -546,7 +567,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
I = PointerMap.find_as(From);
AliasSet *AS = I->second->getAliasSet(*this);
AS->addPointer(*this, Entry, I->second->getSize(),
- I->second->getTBAAInfo(),
+ I->second->getAAInfo(),
true);
}
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index c58012f3a8c3..f64bf0ec2c47 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFGPrinterPass(Registry);
initializeCFGOnlyViewerPass(Registry);
initializeCFGOnlyPrinterPass(Registry);
+ initializeCFLAliasAnalysisPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDelinearizationPass(Registry);
initializeDominanceFrontierPass(Registry);
@@ -66,6 +67,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeScalarEvolutionAliasAnalysisPass(Registry);
initializeTargetTransformInfoAnalysisGroup(Registry);
initializeTypeBasedAliasAnalysisPass(Registry);
+ initializeScopedNoAliasAAPass(Registry);
}
void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
new file mode 100644
index 000000000000..da5ba18fc43b
--- /dev/null
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -0,0 +1,125 @@
+//===- AssumptionCache.cpp - Cache finding @llvm.assume calls -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that keeps track of @llvm.assume intrinsics in
+// the functions of a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void AssumptionCache::scanFunction() {
+ assert(!Scanned && "Tried to scan the function twice!");
+ assert(AssumeHandles.empty() && "Already have assumes when scanning!");
+
+ // Go through all instructions in all blocks, add all calls to @llvm.assume
+ // to this cache.
+ for (BasicBlock &B : F)
+ for (Instruction &II : B)
+ if (match(&II, m_Intrinsic<Intrinsic::assume>()))
+ AssumeHandles.push_back(&II);
+
+ // Mark the scan as complete.
+ Scanned = true;
+}
+
+void AssumptionCache::registerAssumption(CallInst *CI) {
+ assert(match(CI, m_Intrinsic<Intrinsic::assume>()) &&
+ "Registered call does not call @llvm.assume");
+
+ // If we haven't scanned the function yet, just drop this assumption. It will
+ // be found when we scan later.
+ if (!Scanned)
+ return;
+
+ AssumeHandles.push_back(CI);
+
+#ifndef NDEBUG
+ assert(CI->getParent() &&
+ "Cannot register @llvm.assume call not in a basic block");
+ assert(&F == CI->getParent()->getParent() &&
+ "Cannot register @llvm.assume call not in this function");
+
+ // We expect the number of assumptions to be small, so in an asserts build
+ // check that we don't accumulate duplicates and that all assumptions point
+ // to the same function.
+ SmallPtrSet<Value *, 16> AssumptionSet;
+ for (auto &VH : AssumeHandles) {
+ if (!VH)
+ continue;
+
+ assert(&F == cast<Instruction>(VH)->getParent()->getParent() &&
+ "Cached assumption not inside this function!");
+ assert(match(cast<CallInst>(VH), m_Intrinsic<Intrinsic::assume>()) &&
+ "Cached something other than a call to @llvm.assume!");
+ assert(AssumptionSet.insert(VH).second &&
+ "Cache contains multiple copies of a call!");
+ }
+#endif
+}
+
+void AssumptionCacheTracker::FunctionCallbackVH::deleted() {
+ auto I = ACT->AssumptionCaches.find_as(cast<Function>(getValPtr()));
+ if (I != ACT->AssumptionCaches.end())
+ ACT->AssumptionCaches.erase(I);
+ // 'this' now dangles!
+}
+
+AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
+ // We probe the function map twice to try and avoid creating a value handle
+ // around the function in common cases. This makes insertion a bit slower,
+ // but if we have to insert we're going to scan the whole function so that
+ // shouldn't matter.
+ auto I = AssumptionCaches.find_as(&F);
+ if (I != AssumptionCaches.end())
+ return *I->second;
+
+ // Ok, build a new cache by scanning the function, insert it and the value
+ // handle into our map, and return the newly populated cache.
+ auto IP = AssumptionCaches.insert(std::make_pair(
+ FunctionCallbackVH(&F, this), llvm::make_unique<AssumptionCache>(F)));
+ assert(IP.second && "Scanning function already in the map?");
+ return *IP.first->second;
+}
+
+void AssumptionCacheTracker::verifyAnalysis() const {
+#ifndef NDEBUG
+ SmallPtrSet<const CallInst *, 4> AssumptionSet;
+ for (const auto &I : AssumptionCaches) {
+ for (auto &VH : I.second->assumptions())
+ if (VH)
+ AssumptionSet.insert(cast<CallInst>(VH));
+
+ for (const BasicBlock &B : cast<Function>(*I.first))
+ for (const Instruction &II : B)
+ if (match(&II, m_Intrinsic<Intrinsic::assume>()))
+ assert(AssumptionSet.count(cast<CallInst>(&II)) &&
+ "Assumption in scanned function not in cache");
+ }
+#endif
+}
+
+AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
+ initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry());
+}
+
+AssumptionCacheTracker::~AssumptionCacheTracker() {}
+
+INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker",
+ "Assumption Cache Tracker", false, true)
+char AssumptionCacheTracker::ID = 0;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 38ec52d6b9ac..a9efc5a9f734 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -194,7 +195,8 @@ namespace {
/// represented in the result.
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
ExtensionKind &Extension,
- const DataLayout &DL, unsigned Depth) {
+ const DataLayout &DL, unsigned Depth,
+ AssumptionCache *AC, DominatorTree *DT) {
assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
@@ -204,6 +206,14 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
return V;
}
+ if (ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
+ // if it's a constant, just convert it to an offset
+ // and remove the variable.
+ Offset += Const->getValue();
+ assert(Scale == 0 && "Constant values don't have a scale");
+ return V;
+ }
+
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
switch (BOp->getOpcode()) {
@@ -211,23 +221,24 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL))
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC,
+ BOp, DT))
break;
// FALL THROUGH.
case Instruction::Add:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1);
+ DL, Depth + 1, AC, DT);
Offset += RHSC->getValue();
return V;
case Instruction::Mul:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1);
+ DL, Depth + 1, AC, DT);
Offset *= RHSC->getValue();
Scale *= RHSC->getValue();
return V;
case Instruction::Shl:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
- DL, Depth+1);
+ DL, Depth + 1, AC, DT);
Offset <<= RHSC->getValue().getLimitedValue();
Scale <<= RHSC->getValue().getLimitedValue();
return V;
@@ -247,10 +258,13 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Offset = Offset.trunc(SmallWidth);
Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
- Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
- DL, Depth+1);
+ Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL,
+ Depth + 1, AC, DT);
Scale = Scale.zext(OldWidth);
- Offset = Offset.zext(OldWidth);
+
+ // We have to sign-extend even if Extension == EK_ZeroExt as we can't
+ // decompose a sign extension (i.e. zext(x - 1) != zext(x) - zext(-1)).
+ Offset = Offset.sext(OldWidth);
return Result;
}
@@ -278,7 +292,8 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
static const Value *
DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
SmallVectorImpl<VariableGEPIndex> &VarIndices,
- bool &MaxLookupReached, const DataLayout *DL) {
+ bool &MaxLookupReached, const DataLayout *DL,
+ AssumptionCache *AC, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
MaxLookupReached = false;
@@ -309,7 +324,10 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// If it's not a GEP, hand it off to SimplifyInstruction to see if it
// can come up with something. This matches what GetUnderlyingObject does.
if (const Instruction *I = dyn_cast<Instruction>(V))
- // TODO: Get a DominatorTree and use it here.
+ // TODO: Get a DominatorTree and AssumptionCache and use them here
+ // (these are both now available in this function, but this should be
+ // updated when GetUnderlyingObject is updated). TLI should be
+ // provided also.
if (const Value *Simplified =
SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
V = Simplified;
@@ -368,7 +386,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
- *DL, 0);
+ *DL, 0, AC, DT);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
@@ -449,6 +467,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfo>();
}
@@ -456,8 +475,8 @@ namespace {
assert(AliasCache.empty() && "AliasCache must be cleared after use!");
assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
- AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
- LocB.Ptr, LocB.Size, LocB.TBAATag);
+ AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags,
+ LocB.Ptr, LocB.Size, LocB.AATags);
// AliasCache rarely has more than 1 or 2 elements, always use
// shrink_and_clear so it quickly returns to the inline capacity of the
// SmallDenseMap if it ever grows larger.
@@ -471,10 +490,7 @@ namespace {
const Location &Loc) override;
ModRefResult getModRefInfo(ImmutableCallSite CS1,
- ImmutableCallSite CS2) override {
- // The AliasAnalysis base class has some smarts, lets use them.
- return AliasAnalysis::getModRefInfo(CS1, CS2);
- }
+ ImmutableCallSite CS2) override;
/// pointsToConstantMemory - Chase pointers until we find a (constant
/// global) or not.
@@ -544,28 +560,28 @@ namespace {
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
// instruction against another.
AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
- const MDNode *V1TBAAInfo,
+ const AAMDNodes &V1AAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo,
+ const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1, const Value *UnderlyingV2);
// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
// instruction against another.
AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
- const MDNode *PNTBAAInfo,
+ const AAMDNodes &PNAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo);
+ const AAMDNodes &V2AAInfo);
/// aliasSelect - Disambiguate a Select instruction against another value.
AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
- const MDNode *SITBAAInfo,
+ const AAMDNodes &SIAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo);
+ const AAMDNodes &V2AAInfo);
AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
- const MDNode *V1TBAATag,
+ AAMDNodes V1AATag,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAATag);
+ AAMDNodes V2AATag);
};
} // End of anonymous namespace
@@ -574,6 +590,7 @@ char BasicAliasAnalysis::ID = 0;
INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
@@ -596,7 +613,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
Worklist.push_back(Loc.Ptr);
do {
const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
- if (!Visited.insert(V)) {
+ if (!Visited.insert(V).second) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
}
@@ -788,6 +805,14 @@ BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
return Loc;
}
+static bool isAssumeIntrinsic(ImmutableCallSite CS) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II && II->getIntrinsicID() == Intrinsic::assume)
+ return true;
+
+ return false;
+}
+
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
@@ -840,10 +865,29 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
return NoModRef;
}
+ // While the assume intrinsic is marked as arbitrarily writing so that
+ // proper control dependencies will be maintained, it never aliases any
+ // particular memory location.
+ if (isAssumeIntrinsic(CS))
+ return NoModRef;
+
// The AliasAnalysis base class has some smarts, lets use them.
return AliasAnalysis::getModRefInfo(CS, Loc);
}
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // While the assume intrinsic is marked as arbitrarily writing so that
+ // proper control dependencies will be maintained, it never aliases any
+ // particular memory location.
+ if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2))
+ return NoModRef;
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
@@ -851,30 +895,50 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
///
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
- const MDNode *V1TBAAInfo,
+ const AAMDNodes &V1AAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo,
+ const AAMDNodes &V2AAInfo,
const Value *UnderlyingV1,
const Value *UnderlyingV2) {
int64_t GEP1BaseOffset;
bool GEP1MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+ // We have to get two AssumptionCaches here because GEP1 and V2 may be from
+ // different functions.
+ // FIXME: This really doesn't make any sense. We get a dominator tree below
+ // that can only refer to a single function. But this function (aliasGEP) is
+ // a method on an immutable pass that can be called when there *isn't*
+ // a single function. The old pass management layer makes this "work", but
+ // this isn't really a clean solution.
+ AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>();
+ AssumptionCache *AC1 = nullptr, *AC2 = nullptr;
+ if (auto *GEP1I = dyn_cast<Instruction>(GEP1))
+ AC1 = &ACT.getAssumptionCache(
+ const_cast<Function &>(*GEP1I->getParent()->getParent()));
+ if (auto *I2 = dyn_cast<Instruction>(V2))
+ AC2 = &ACT.getAssumptionCache(
+ const_cast<Function &>(*I2->getParent()->getParent()));
+
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+
// If we have two gep instructions with must-alias or not-alias'ing base
// pointers, figure out if the indexes to the GEP tell us anything about the
// derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Do the base pointers alias?
- AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
- UnderlyingV2, UnknownSize, nullptr);
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(),
+ UnderlyingV2, UnknownSize, AAMDNodes());
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
if ((BaseAlias == MayAlias) && V1Size == V2Size) {
// Do the base pointers alias assuming type and size.
AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
- V1TBAAInfo, UnderlyingV2,
- V2Size, V2TBAAInfo);
+ V1AAInfo, UnderlyingV2,
+ V2Size, V2AAInfo);
if (PreciseBaseAlias == NoAlias) {
// See if the computed offset from the common pointer tells us about the
// relation of the resulting pointer.
@@ -882,11 +946,11 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
- DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL);
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+ GEP2MaxLookupReached, DL, AC2, DT);
const Value *GEP1BasePtr =
- DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL);
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+ GEP1MaxLookupReached, DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -914,15 +978,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// exactly, see if the computed offset from the common pointer tells us
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
- DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL);
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+ GEP1MaxLookupReached, DL, AC1, DT);
int64_t GEP2BaseOffset;
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
- DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL);
+ DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+ GEP2MaxLookupReached, DL, AC2, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -949,8 +1013,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
if (V1Size == UnknownSize && V2Size == UnknownSize)
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
- V2, V2Size, V2TBAAInfo);
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(),
+ V2, V2Size, V2AAInfo);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -960,8 +1024,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return R;
const Value *GEP1BasePtr =
- DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL);
+ DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+ GEP1MaxLookupReached, DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -1012,12 +1076,43 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
}
}
- // Try to distinguish something like &A[i][1] against &A[42][0].
- // Grab the least significant bit set in any of the scales.
if (!GEP1VariableIndices.empty()) {
uint64_t Modulo = 0;
- for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
- Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+ bool AllPositive = true;
+ for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
+
+ // Try to distinguish something like &A[i][1] against &A[42][0].
+ // Grab the least significant bit set in any of the scales. We
+ // don't need std::abs here (even if the scale's negative) as we'll
+ // be ^'ing Modulo with itself later.
+ Modulo |= (uint64_t) GEP1VariableIndices[i].Scale;
+
+ if (AllPositive) {
+ // If the Value could change between cycles, then any reasoning about
+ // the Value this cycle may not hold in the next cycle. We'll just
+ // give up if we can't determine conditions that hold for every cycle:
+ const Value *V = GEP1VariableIndices[i].V;
+
+ bool SignKnownZero, SignKnownOne;
+ ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+ 0, AC1, nullptr, DT);
+
+ // Zero-extension widens the variable, and so forces the sign
+ // bit to zero.
+ bool IsZExt = GEP1VariableIndices[i].Extension == EK_ZeroExt;
+ SignKnownZero |= IsZExt;
+ SignKnownOne &= !IsZExt;
+
+ // If the variable begins with a zero then we know it's
+ // positive, regardless of whether the value is signed or
+ // unsigned.
+ int64_t Scale = GEP1VariableIndices[i].Scale;
+ AllPositive =
+ (SignKnownZero && Scale >= 0) ||
+ (SignKnownOne && Scale < 0);
+ }
+ }
+
Modulo = Modulo ^ (Modulo & (Modulo - 1));
// We can compute the difference between the two addresses
@@ -1027,6 +1122,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
if (V1Size != UnknownSize && V2Size != UnknownSize &&
ModOffset >= V2Size && V1Size <= Modulo - ModOffset)
return NoAlias;
+
+ // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
+ // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
+ // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
+ if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t) GEP1BaseOffset)
+ return NoAlias;
}
// Statically, we can see that the base objects are the same, but the
@@ -1056,33 +1157,33 @@ MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
/// instruction against another.
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
- const MDNode *SITBAAInfo,
+ const AAMDNodes &SIAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo) {
+ const AAMDNodes &V2AAInfo) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
if (SI->getCondition() == SI2->getCondition()) {
AliasResult Alias =
- aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
- SI2->getTrueValue(), V2Size, V2TBAAInfo);
+ aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
+ SI2->getTrueValue(), V2Size, V2AAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
- SI2->getFalseValue(), V2Size, V2TBAAInfo);
+ aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
+ SI2->getFalseValue(), V2Size, V2AAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
- aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
return MergeAliasResults(ThisAlias, Alias);
}
@@ -1090,9 +1191,9 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
// against another.
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
- const MDNode *PNTBAAInfo,
+ const AAMDNodes &PNAAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo) {
+ const AAMDNodes &V2AAInfo) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
@@ -1102,8 +1203,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
// on corresponding edges.
if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
if (PN2->getParent() == PN->getParent()) {
- LocPair Locs(Location(PN, PNSize, PNTBAAInfo),
- Location(V2, V2Size, V2TBAAInfo));
+ LocPair Locs(Location(PN, PNSize, PNAAInfo),
+ Location(V2, V2Size, V2AAInfo));
if (PN > V2)
std::swap(Locs.first, Locs.second);
// Analyse the PHIs' inputs under the assumption that the PHIs are
@@ -1121,9 +1222,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
AliasResult ThisAlias =
- aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
+ aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
- V2Size, V2TBAAInfo);
+ V2Size, V2AAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1146,12 +1247,12 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
// sides are PHI nodes. In which case, this is O(m x n) time where 'm'
// and 'n' are the number of PHI sources.
return MayAlias;
- if (UniqueSrc.insert(PV1))
+ if (UniqueSrc.insert(PV1).second)
V1Srcs.push_back(PV1);
}
- AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
- V1Srcs[0], PNSize, PNTBAAInfo);
+ AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo,
+ V1Srcs[0], PNSize, PNAAInfo);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
if (Alias == MayAlias)
@@ -1162,8 +1263,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
- AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
- V, PNSize, PNTBAAInfo);
+ AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo,
+ V, PNSize, PNAAInfo);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1177,9 +1278,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
//
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
- const MDNode *V1TBAAInfo,
+ AAMDNodes V1AAInfo,
const Value *V2, uint64_t V2Size,
- const MDNode *V2TBAAInfo) {
+ AAMDNodes V2AAInfo) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size == 0 || V2Size == 0)
@@ -1259,8 +1360,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
// Check the cache before climbing up use-def chains. This also terminates
// otherwise infinitely recursive queries.
- LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
- Location(V2, V2Size, V2TBAAInfo));
+ LocPair Locs(Location(V1, V1Size, V1AAInfo),
+ Location(V2, V2Size, V2AAInfo));
if (V1 > V2)
std::swap(Locs.first, Locs.second);
std::pair<AliasCacheTy::iterator, bool> Pair =
@@ -1274,32 +1375,32 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
std::swap(V1, V2);
std::swap(V1Size, V2Size);
std::swap(O1, O2);
- std::swap(V1TBAAInfo, V2TBAAInfo);
+ std::swap(V1AAInfo, V2AAInfo);
}
if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
- AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2);
+ AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
- std::swap(V1TBAAInfo, V2TBAAInfo);
+ std::swap(V1AAInfo, V2AAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
- AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
- V2, V2Size, V2TBAAInfo);
+ AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
+ V2, V2Size, V2AAInfo);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
std::swap(V1, V2);
std::swap(V1Size, V2Size);
- std::swap(V1TBAAInfo, V2TBAAInfo);
+ std::swap(V1AAInfo, V2AAInfo);
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
- AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
- V2, V2Size, V2TBAAInfo);
+ AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo,
+ V2, V2Size, V2AAInfo);
if (Result != MayAlias) return AliasCache[Locs] = Result;
}
@@ -1312,8 +1413,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
return AliasCache[Locs] = PartialAlias;
AliasResult Result =
- AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
- Location(V2, V2Size, V2TBAAInfo));
+ AliasAnalysis::alias(Location(V1, V1Size, V1AAInfo),
+ Location(V2, V2Size, V2AAInfo));
return AliasCache[Locs] = Result;
}
@@ -1338,10 +1439,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
// phi nodes could be involved in.
- for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
- PE = VisitedPhiBBs.end();
- PI != PE; ++PI)
- if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
+ for (auto *P : VisitedPhiBBs)
+ if (isPotentiallyReachable(P->begin(), Inst, DT, LI))
return false;
return true;
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 723332d480f4..278073cd6c52 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -614,7 +614,8 @@ static void findIrreducibleHeaders(
break;
}
}
- assert(Headers.size() >= 2 && "Should be irreducible");
+ assert(Headers.size() >= 2 &&
+ "Expected irreducible CFG; -loop-info is likely invalid");
if (Headers.size() == InSCC.size()) {
// Every block is a header.
std::sort(Headers.begin(), Headers.end());
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index bbd875059522..2b39d47e5faf 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -196,7 +196,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
SmallVector<uint32_t, 2> Weights;
Weights.reserve(TI->getNumSuccessors());
for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
- ConstantInt *Weight = dyn_cast<ConstantInt>(WeightsNode->getOperand(i));
+ ConstantInt *Weight =
+ mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
if (!Weight)
return false;
Weights.push_back(
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 8ef5302717f8..8ecd70b5d716 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
void llvm::FindFunctionBackedges(const Function &F,
SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
const BasicBlock *BB = &F.getEntryBlock();
- if (succ_begin(BB) == succ_end(BB))
+ if (succ_empty(BB))
return;
SmallPtrSet<const BasicBlock*, 8> Visited;
@@ -45,7 +45,7 @@ void llvm::FindFunctionBackedges(const Function &F,
bool FoundNew = false;
while (I != succ_end(ParentBB)) {
BB = *I++;
- if (Visited.insert(BB)) {
+ if (Visited.insert(BB).second) {
FoundNew = true;
break;
}
@@ -141,7 +141,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
SmallSet<const BasicBlock*, 64> Visited;
do {
BasicBlock *BB = Worklist.pop_back_val();
- if (!Visited.insert(BB))
+ if (!Visited.insert(BB).second)
continue;
if (BB == StopBB)
return true;
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index c2c19d6a69ef..89787f826bec 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -79,11 +79,11 @@ namespace {
bool runOnFunction(Function &F) override {
std::string Filename = "cfg." + F.getName().str() + ".dot";
errs() << "Writing '" << Filename << "'...";
-
- std::string ErrorInfo;
- raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text);
- if (ErrorInfo.empty())
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+ if (!EC)
WriteGraph(File, (const Function*)&F);
else
errs() << " error opening file for writing!";
@@ -114,10 +114,10 @@ namespace {
std::string Filename = "cfg." + F.getName().str() + ".dot";
errs() << "Writing '" << Filename << "'...";
- std::string ErrorInfo;
- raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text);
-
- if (ErrorInfo.empty())
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+ if (!EC)
WriteGraph(File, (const Function*)&F, true);
else
errs() << " error opening file for writing!";
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
new file mode 100644
index 000000000000..88bb84a80020
--- /dev/null
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -0,0 +1,1013 @@
+//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a CFL-based context-insensitive alias analysis
+// algorithm. It does not depend on types. The algorithm is a mixture of the one
+// described in "Demand-driven alias analysis for C" by Xin Zheng and Radu
+// Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to
+// Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the
+// papers, we build a graph of the uses of a variable, where each node is a
+// memory location, and each edge is an action that happened on that memory
+// location. The "actions" can be one of Dereference, Reference, Assign, or
+// Assign.
+//
+// Two variables are considered as aliasing iff you can reach one value's node
+// from the other value's node and the language formed by concatenating all of
+// the edge labels (actions) conforms to a context-free grammar.
+//
+// Because this algorithm requires a graph search on each query, we execute the
+// algorithm outlined in "Fast algorithms..." (mentioned above)
+// in order to transform the graph into sets of variables that may alias in
+// ~nlogn time (n = number of variables.), which makes queries take constant
+// time.
+//===----------------------------------------------------------------------===//
+
+#include "StratifiedSets.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <forward_list>
+#include <tuple>
+
+using namespace llvm;
+
+// Try to go from a Value* to a Function*. Never returns nullptr.
+static Optional<Function *> parentFunctionOfValue(Value *);
+
+// Returns possible functions called by the Inst* into the given
+// SmallVectorImpl. Returns true if targets found, false otherwise.
+// This is templated because InvokeInst/CallInst give us the same
+// set of functions that we care about, and I don't like repeating
+// myself.
+template <typename Inst>
+static bool getPossibleTargets(Inst *, SmallVectorImpl<Function *> &);
+
+// Some instructions need to have their users tracked. Instructions like
+// `add` require you to get the users of the Instruction* itself, other
+// instructions like `store` require you to get the users of the first
+// operand. This function gets the "proper" value to track for each
+// type of instruction we support.
+static Optional<Value *> getTargetValue(Instruction *);
+
+// There are certain instructions (i.e. FenceInst, etc.) that we ignore.
+// This notes that we should ignore those.
+static bool hasUsefulEdges(Instruction *);
+
+const StratifiedIndex StratifiedLink::SetSentinel =
+ std::numeric_limits<StratifiedIndex>::max();
+
+namespace {
+// StratifiedInfo Attribute things.
+typedef unsigned StratifiedAttr;
+LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
+LLVM_CONSTEXPR unsigned AttrAllIndex = 0;
+LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1;
+LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2;
+LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
+LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
+
+LLVM_CONSTEXPR StratifiedAttr AttrNone = 0;
+LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone;
+
+// \brief StratifiedSets call for knowledge of "direction", so this is how we
+// represent that locally.
+enum class Level { Same, Above, Below };
+
+// \brief Edges can be one of four "weights" -- each weight must have an inverse
+// weight (Assign has Assign; Reference has Dereference).
+enum class EdgeType {
+ // The weight assigned when assigning from or to a value. For example, in:
+ // %b = getelementptr %a, 0
+ // ...The relationships are %b assign %a, and %a assign %b. This used to be
+ // two edges, but having a distinction bought us nothing.
+ Assign,
+
+ // The edge used when we have an edge going from some handle to a Value.
+ // Examples of this include:
+ // %b = load %a (%b Dereference %a)
+ // %b = extractelement %a, 0 (%a Dereference %b)
+ Dereference,
+
+ // The edge used when our edge goes from a value to a handle that may have
+ // contained it at some point. Examples:
+ // %b = load %a (%a Reference %b)
+ // %b = extractelement %a, 0 (%b Reference %a)
+ Reference
+};
+
+// \brief Encodes the notion of a "use"
+struct Edge {
+ // \brief Which value the edge is coming from
+ Value *From;
+
+ // \brief Which value the edge is pointing to
+ Value *To;
+
+ // \brief Edge weight
+ EdgeType Weight;
+
+ // \brief Whether we aliased any external values along the way that may be
+ // invisible to the analysis (i.e. landingpad for exceptions, calls for
+ // interprocedural analysis, etc.)
+ StratifiedAttrs AdditionalAttrs;
+
+ Edge(Value *From, Value *To, EdgeType W, StratifiedAttrs A)
+ : From(From), To(To), Weight(W), AdditionalAttrs(A) {}
+};
+
+// \brief Information we have about a function and would like to keep around
+struct FunctionInfo {
+ StratifiedSets<Value *> Sets;
+ // Lots of functions have < 4 returns. Adjust as necessary.
+ SmallVector<Value *, 4> ReturnedValues;
+
+ FunctionInfo(StratifiedSets<Value *> &&S,
+ SmallVector<Value *, 4> &&RV)
+ : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+};
+
+struct CFLAliasAnalysis;
+
+struct FunctionHandle : public CallbackVH {
+ FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA)
+ : CallbackVH(Fn), CFLAA(CFLAA) {
+ assert(Fn != nullptr);
+ assert(CFLAA != nullptr);
+ }
+
+ virtual ~FunctionHandle() {}
+
+ void deleted() override { removeSelfFromCache(); }
+ void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
+
+private:
+ CFLAliasAnalysis *CFLAA;
+
+ void removeSelfFromCache();
+};
+
+struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis {
+private:
+ /// \brief Cached mapping of Functions to their StratifiedSets.
+ /// If a function's sets are currently being built, it is marked
+ /// in the cache as an Optional without a value. This way, if we
+ /// have any kind of recursion, it is discernable from a function
+ /// that simply has empty sets.
+ DenseMap<Function *, Optional<FunctionInfo>> Cache;
+ std::forward_list<FunctionHandle> Handles;
+
+public:
+ static char ID;
+
+ CFLAliasAnalysis() : ImmutablePass(ID) {
+ initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual ~CFLAliasAnalysis() {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AliasAnalysis::getAnalysisUsage(AU);
+ }
+
+ void *getAdjustedAnalysisPointer(const void *ID) override {
+ if (ID == &AliasAnalysis::ID)
+ return (AliasAnalysis *)this;
+ return this;
+ }
+
+ /// \brief Inserts the given Function into the cache.
+ void scan(Function *Fn);
+
+ void evict(Function *Fn) { Cache.erase(Fn); }
+
+ /// \brief Ensures that the given function is available in the cache.
+ /// Returns the appropriate entry from the cache.
+ const Optional<FunctionInfo> &ensureCached(Function *Fn) {
+ auto Iter = Cache.find(Fn);
+ if (Iter == Cache.end()) {
+ scan(Fn);
+ Iter = Cache.find(Fn);
+ assert(Iter != Cache.end());
+ assert(Iter->second.hasValue());
+ }
+ return Iter->second;
+ }
+
+ AliasResult query(const Location &LocA, const Location &LocB);
+
+ AliasResult alias(const Location &LocA, const Location &LocB) override {
+ if (LocA.Ptr == LocB.Ptr) {
+ if (LocA.Size == LocB.Size) {
+ return MustAlias;
+ } else {
+ return PartialAlias;
+ }
+ }
+
+ // Comparisons between global variables and other constants should be
+ // handled by BasicAA.
+ if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
+ return MayAlias;
+ }
+
+ return query(LocA, LocB);
+ }
+
+ void initializePass() override { InitializeAliasAnalysis(this); }
+};
+
+void FunctionHandle::removeSelfFromCache() {
+ assert(CFLAA != nullptr);
+ auto *Val = getValPtr();
+ CFLAA->evict(cast<Function>(Val));
+ setValPtr(nullptr);
+}
+
+// \brief Gets the edges our graph should have, based on an Instruction*
+class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
+ CFLAliasAnalysis &AA;
+ SmallVectorImpl<Edge> &Output;
+
+public:
+ GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output)
+ : AA(AA), Output(Output) {}
+
+ void visitInstruction(Instruction &) {
+ llvm_unreachable("Unsupported instruction encountered");
+ }
+
+ void visitCastInst(CastInst &Inst) {
+ Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign,
+ AttrNone));
+ }
+
+ void visitBinaryOperator(BinaryOperator &Inst) {
+ auto *Op1 = Inst.getOperand(0);
+ auto *Op2 = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, Op1, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, Op2, EdgeType::Assign, AttrNone));
+ }
+
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = Inst.getNewValOperand();
+ Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitAtomicRMWInst(AtomicRMWInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = Inst.getValOperand();
+ Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitPHINode(PHINode &Inst) {
+ for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) {
+ Value *Val = Inst.getIncomingValue(I);
+ Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone));
+ }
+ }
+
+ void visitGetElementPtrInst(GetElementPtrInst &Inst) {
+ auto *Op = Inst.getPointerOperand();
+ Output.push_back(Edge(&Inst, Op, EdgeType::Assign, AttrNone));
+ for (auto I = Inst.idx_begin(), E = Inst.idx_end(); I != E; ++I)
+ Output.push_back(Edge(&Inst, *I, EdgeType::Assign, AttrNone));
+ }
+
+ void visitSelectInst(SelectInst &Inst) {
+ auto *Condition = Inst.getCondition();
+ Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone));
+ auto *TrueVal = Inst.getTrueValue();
+ Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone));
+ auto *FalseVal = Inst.getFalseValue();
+ Output.push_back(Edge(&Inst, FalseVal, EdgeType::Assign, AttrNone));
+ }
+
+ void visitAllocaInst(AllocaInst &) {}
+
+ void visitLoadInst(LoadInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = &Inst;
+ Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone));
+ }
+
+ void visitStoreInst(StoreInst &Inst) {
+ auto *Ptr = Inst.getPointerOperand();
+ auto *Val = Inst.getValueOperand();
+ Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitVAArgInst(VAArgInst &Inst) {
+ // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it does
+ // two things:
+ // 1. Loads a value from *((T*)*Ptr).
+ // 2. Increments (stores to) *Ptr by some target-specific amount.
+ // For now, we'll handle this like a landingpad instruction (by placing the
+ // result in its own group, and having that group alias externals).
+ auto *Val = &Inst;
+ Output.push_back(Edge(Val, Val, EdgeType::Assign, AttrAll));
+ }
+
+ static bool isFunctionExternal(Function *Fn) {
+ return Fn->isDeclaration() || !Fn->hasLocalLinkage();
+ }
+
+ // Gets whether the sets at Index1 above, below, or equal to the sets at
+ // Index2. Returns None if they are not in the same set chain.
+ static Optional<Level> getIndexRelation(const StratifiedSets<Value *> &Sets,
+ StratifiedIndex Index1,
+ StratifiedIndex Index2) {
+ if (Index1 == Index2)
+ return Level::Same;
+
+ const auto *Current = &Sets.getLink(Index1);
+ while (Current->hasBelow()) {
+ if (Current->Below == Index2)
+ return Level::Below;
+ Current = &Sets.getLink(Current->Below);
+ }
+
+ Current = &Sets.getLink(Index1);
+ while (Current->hasAbove()) {
+ if (Current->Above == Index2)
+ return Level::Above;
+ Current = &Sets.getLink(Current->Above);
+ }
+
+ return NoneType();
+ }
+
+ bool
+ tryInterproceduralAnalysis(const SmallVectorImpl<Function *> &Fns,
+ Value *FuncValue,
+ const iterator_range<User::op_iterator> &Args) {
+ const unsigned ExpectedMaxArgs = 8;
+ const unsigned MaxSupportedArgs = 50;
+ assert(Fns.size() > 0);
+
+ // I put this here to give us an upper bound on time taken by IPA. Is it
+ // really (realistically) needed? Keep in mind that we do have an n^2 algo.
+ if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs)
+ return false;
+
+ // Exit early if we'll fail anyway
+ for (auto *Fn : Fns) {
+ if (isFunctionExternal(Fn) || Fn->isVarArg())
+ return false;
+ auto &MaybeInfo = AA.ensureCached(Fn);
+ if (!MaybeInfo.hasValue())
+ return false;
+ }
+
+ SmallVector<Value *, ExpectedMaxArgs> Arguments(Args.begin(), Args.end());
+ SmallVector<StratifiedInfo, ExpectedMaxArgs> Parameters;
+ for (auto *Fn : Fns) {
+ auto &Info = *AA.ensureCached(Fn);
+ auto &Sets = Info.Sets;
+ auto &RetVals = Info.ReturnedValues;
+
+ Parameters.clear();
+ for (auto &Param : Fn->args()) {
+ auto MaybeInfo = Sets.find(&Param);
+ // Did a new parameter somehow get added to the function/slip by?
+ if (!MaybeInfo.hasValue())
+ return false;
+ Parameters.push_back(*MaybeInfo);
+ }
+
+ // Adding an edge from argument -> return value for each parameter that
+ // may alias the return value
+ for (unsigned I = 0, E = Parameters.size(); I != E; ++I) {
+ auto &ParamInfo = Parameters[I];
+ auto &ArgVal = Arguments[I];
+ bool AddEdge = false;
+ StratifiedAttrs Externals;
+ for (unsigned X = 0, XE = RetVals.size(); X != XE; ++X) {
+ auto MaybeInfo = Sets.find(RetVals[X]);
+ if (!MaybeInfo.hasValue())
+ return false;
+
+ auto &RetInfo = *MaybeInfo;
+ auto RetAttrs = Sets.getLink(RetInfo.Index).Attrs;
+ auto ParamAttrs = Sets.getLink(ParamInfo.Index).Attrs;
+ auto MaybeRelation =
+ getIndexRelation(Sets, ParamInfo.Index, RetInfo.Index);
+ if (MaybeRelation.hasValue()) {
+ AddEdge = true;
+ Externals |= RetAttrs | ParamAttrs;
+ }
+ }
+ if (AddEdge)
+ Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign,
+ StratifiedAttrs().flip()));
+ }
+
+ if (Parameters.size() != Arguments.size())
+ return false;
+
+ // Adding edges between arguments for arguments that may end up aliasing
+ // each other. This is necessary for functions such as
+ // void foo(int** a, int** b) { *a = *b; }
+ // (Technically, the proper sets for this would be those below
+ // Arguments[I] and Arguments[X], but our algorithm will produce
+ // extremely similar, and equally correct, results either way)
+ for (unsigned I = 0, E = Arguments.size(); I != E; ++I) {
+ auto &MainVal = Arguments[I];
+ auto &MainInfo = Parameters[I];
+ auto &MainAttrs = Sets.getLink(MainInfo.Index).Attrs;
+ for (unsigned X = I + 1; X != E; ++X) {
+ auto &SubInfo = Parameters[X];
+ auto &SubVal = Arguments[X];
+ auto &SubAttrs = Sets.getLink(SubInfo.Index).Attrs;
+ auto MaybeRelation =
+ getIndexRelation(Sets, MainInfo.Index, SubInfo.Index);
+
+ if (!MaybeRelation.hasValue())
+ continue;
+
+ auto NewAttrs = SubAttrs | MainAttrs;
+ Output.push_back(Edge(MainVal, SubVal, EdgeType::Assign, NewAttrs));
+ }
+ }
+ }
+ return true;
+ }
+
+ template <typename InstT> void visitCallLikeInst(InstT &Inst) {
+ SmallVector<Function *, 4> Targets;
+ if (getPossibleTargets(&Inst, Targets)) {
+ if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands()))
+ return;
+ // Cleanup from interprocedural analysis
+ Output.clear();
+ }
+
+ for (Value *V : Inst.arg_operands())
+ Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll));
+ }
+
+ void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); }
+
+ void visitInvokeInst(InvokeInst &Inst) { visitCallLikeInst(Inst); }
+
+ // Because vectors/aggregates are immutable and unaddressable,
+ // there's nothing we can do to coax a value out of them, other
+ // than calling Extract{Element,Value}. We can effectively treat
+ // them as pointers to arbitrary memory locations we can store in
+ // and load from.
+ void visitExtractElementInst(ExtractElementInst &Inst) {
+ auto *Ptr = Inst.getVectorOperand();
+ auto *Val = &Inst;
+ Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone));
+ }
+
+ void visitInsertElementInst(InsertElementInst &Inst) {
+ auto *Vec = Inst.getOperand(0);
+ auto *Val = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, Vec, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitLandingPadInst(LandingPadInst &Inst) {
+ // Exceptions come from "nowhere", from our analysis' perspective.
+ // So we place the instruction its own group, noting that said group may
+ // alias externals
+ Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));
+ }
+
+ void visitInsertValueInst(InsertValueInst &Inst) {
+ auto *Agg = Inst.getOperand(0);
+ auto *Val = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, Agg, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone));
+ }
+
+ void visitExtractValueInst(ExtractValueInst &Inst) {
+ auto *Ptr = Inst.getAggregateOperand();
+ Output.push_back(Edge(&Inst, Ptr, EdgeType::Reference, AttrNone));
+ }
+
+ void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
+ auto *From1 = Inst.getOperand(0);
+ auto *From2 = Inst.getOperand(1);
+ Output.push_back(Edge(&Inst, From1, EdgeType::Assign, AttrNone));
+ Output.push_back(Edge(&Inst, From2, EdgeType::Assign, AttrNone));
+ }
+};
+
+// For a given instruction, we need to know which Value* to get the
+// users of in order to build our graph. In some cases (i.e. add),
+// we simply need the Instruction*. In other cases (i.e. store),
+// finding the users of the Instruction* is useless; we need to find
+// the users of the first operand. This handles determining which
+// value to follow for us.
+//
+// Note: we *need* to keep this in sync with GetEdgesVisitor. Add
+// something to GetEdgesVisitor, add it here -- remove something from
+// GetEdgesVisitor, remove it here.
+class GetTargetValueVisitor
+ : public InstVisitor<GetTargetValueVisitor, Value *> {
+public:
+ Value *visitInstruction(Instruction &Inst) { return &Inst; }
+
+ Value *visitStoreInst(StoreInst &Inst) { return Inst.getPointerOperand(); }
+
+ Value *visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
+ return Inst.getPointerOperand();
+ }
+
+ Value *visitAtomicRMWInst(AtomicRMWInst &Inst) {
+ return Inst.getPointerOperand();
+ }
+
+ Value *visitInsertElementInst(InsertElementInst &Inst) {
+ return Inst.getOperand(0);
+ }
+
+ Value *visitInsertValueInst(InsertValueInst &Inst) {
+ return Inst.getAggregateOperand();
+ }
+};
+
+// Set building requires a weighted bidirectional graph.
+template <typename EdgeTypeT> class WeightedBidirectionalGraph {
+public:
+ typedef std::size_t Node;
+
+private:
+ const static Node StartNode = Node(0);
+
+ struct Edge {
+ EdgeTypeT Weight;
+ Node Other;
+
+ Edge(const EdgeTypeT &W, const Node &N)
+ : Weight(W), Other(N) {}
+
+ bool operator==(const Edge &E) const {
+ return Weight == E.Weight && Other == E.Other;
+ }
+
+ bool operator!=(const Edge &E) const { return !operator==(E); }
+ };
+
+ struct NodeImpl {
+ std::vector<Edge> Edges;
+ };
+
+ std::vector<NodeImpl> NodeImpls;
+
+ bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); }
+
+ const NodeImpl &getNode(Node N) const { return NodeImpls[N]; }
+ NodeImpl &getNode(Node N) { return NodeImpls[N]; }
+
+public:
+ // ----- Various Edge iterators for the graph ----- //
+
+ // \brief Iterator for edges. Because this graph is bidirected, we don't
+ // allow modificaiton of the edges using this iterator. Additionally, the
+ // iterator becomes invalid if you add edges to or from the node you're
+ // getting the edges of.
+ struct EdgeIterator : public std::iterator<std::forward_iterator_tag,
+ std::tuple<EdgeTypeT, Node *>> {
+ EdgeIterator(const typename std::vector<Edge>::const_iterator &Iter)
+ : Current(Iter) {}
+
+ EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {}
+
+ EdgeIterator &operator++() {
+ ++Current;
+ return *this;
+ }
+
+ EdgeIterator operator++(int) {
+ EdgeIterator Copy(Current);
+ operator++();
+ return Copy;
+ }
+
+ std::tuple<EdgeTypeT, Node> &operator*() {
+ Store = std::make_tuple(Current->Weight, Current->Other);
+ return Store;
+ }
+
+ bool operator==(const EdgeIterator &Other) const {
+ return Current == Other.Current;
+ }
+
+ bool operator!=(const EdgeIterator &Other) const {
+ return !operator==(Other);
+ }
+
+ private:
+ typename std::vector<Edge>::const_iterator Current;
+ std::tuple<EdgeTypeT, Node> Store;
+ };
+
+ // Wrapper for EdgeIterator with begin()/end() calls.
+ struct EdgeIterable {
+ EdgeIterable(const std::vector<Edge> &Edges)
+ : BeginIter(Edges.begin()), EndIter(Edges.end()) {}
+
+ EdgeIterator begin() { return EdgeIterator(BeginIter); }
+
+ EdgeIterator end() { return EdgeIterator(EndIter); }
+
+ private:
+ typename std::vector<Edge>::const_iterator BeginIter;
+ typename std::vector<Edge>::const_iterator EndIter;
+ };
+
+ // ----- Actual graph-related things ----- //
+
+ WeightedBidirectionalGraph() {}
+
+ WeightedBidirectionalGraph(WeightedBidirectionalGraph<EdgeTypeT> &&Other)
+ : NodeImpls(std::move(Other.NodeImpls)) {}
+
+ WeightedBidirectionalGraph<EdgeTypeT> &
+ operator=(WeightedBidirectionalGraph<EdgeTypeT> &&Other) {
+ NodeImpls = std::move(Other.NodeImpls);
+ return *this;
+ }
+
+ Node addNode() {
+ auto Index = NodeImpls.size();
+ auto NewNode = Node(Index);
+ NodeImpls.push_back(NodeImpl());
+ return NewNode;
+ }
+
+ void addEdge(Node From, Node To, const EdgeTypeT &Weight,
+ const EdgeTypeT &ReverseWeight) {
+ assert(inbounds(From));
+ assert(inbounds(To));
+ auto &FromNode = getNode(From);
+ auto &ToNode = getNode(To);
+ FromNode.Edges.push_back(Edge(Weight, To));
+ ToNode.Edges.push_back(Edge(ReverseWeight, From));
+ }
+
+ EdgeIterable edgesFor(const Node &N) const {
+ const auto &Node = getNode(N);
+ return EdgeIterable(Node.Edges);
+ }
+
+ bool empty() const { return NodeImpls.empty(); }
+ std::size_t size() const { return NodeImpls.size(); }
+
+ // \brief Gets an arbitrary node in the graph as a starting point for
+ // traversal.
+ Node getEntryNode() {
+ assert(inbounds(StartNode));
+ return StartNode;
+ }
+};
+
+typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
+typedef DenseMap<Value *, GraphT::Node> NodeMapT;
+}
+
+// -- Setting up/registering CFLAA pass -- //
+char CFLAliasAnalysis::ID = 0;
+
+INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa",
+ "CFL-Based AA implementation", false, true, false)
+
+ImmutablePass *llvm::createCFLAliasAnalysisPass() {
+ return new CFLAliasAnalysis();
+}
+
+//===----------------------------------------------------------------------===//
+// Function declarations that require types defined in the namespace above
+//===----------------------------------------------------------------------===//
+
+// Given an argument number, returns the appropriate Attr index to set.
+static StratifiedAttr argNumberToAttrIndex(StratifiedAttr);
+
+// Given a Value, potentially return which AttrIndex it maps to.
+static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);
+
+// Gets the inverse of a given EdgeType.
+static EdgeType flipWeight(EdgeType);
+
+// Gets edges of the given Instruction*, writing them to the SmallVector*.
+static void argsToEdges(CFLAliasAnalysis &, Instruction *,
+ SmallVectorImpl<Edge> &);
+
+// Gets the "Level" that one should travel in StratifiedSets
+// given an EdgeType.
+static Level directionOfEdgeType(EdgeType);
+
+// Builds the graph needed for constructing the StratifiedSets for the
+// given function
+static void buildGraphFrom(CFLAliasAnalysis &, Function *,
+ SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
+
+// Builds the graph + StratifiedSets for a function.
+static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
+
+static Optional<Function *> parentFunctionOfValue(Value *Val) {
+ if (auto *Inst = dyn_cast<Instruction>(Val)) {
+ auto *Bb = Inst->getParent();
+ return Bb->getParent();
+ }
+
+ if (auto *Arg = dyn_cast<Argument>(Val))
+ return Arg->getParent();
+ return NoneType();
+}
+
+template <typename Inst>
+static bool getPossibleTargets(Inst *Call,
+ SmallVectorImpl<Function *> &Output) {
+ if (auto *Fn = Call->getCalledFunction()) {
+ Output.push_back(Fn);
+ return true;
+ }
+
+ // TODO: If the call is indirect, we might be able to enumerate all potential
+ // targets of the call and return them, rather than just failing.
+ return false;
+}
+
+static Optional<Value *> getTargetValue(Instruction *Inst) {
+ GetTargetValueVisitor V;
+ return V.visit(Inst);
+}
+
+static bool hasUsefulEdges(Instruction *Inst) {
+ bool IsNonInvokeTerminator =
+ isa<TerminatorInst>(Inst) && !isa<InvokeInst>(Inst);
+ return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) && !IsNonInvokeTerminator;
+}
+
+static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
+ if (isa<GlobalValue>(Val))
+ return AttrGlobalIndex;
+
+ if (auto *Arg = dyn_cast<Argument>(Val))
+ if (!Arg->hasNoAliasAttr())
+ return argNumberToAttrIndex(Arg->getArgNo());
+ return NoneType();
+}
+
+static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) {
+ if (ArgNum > AttrMaxNumArgs)
+ return AttrAllIndex;
+ return ArgNum + AttrFirstArgIndex;
+}
+
+static EdgeType flipWeight(EdgeType Initial) {
+ switch (Initial) {
+ case EdgeType::Assign:
+ return EdgeType::Assign;
+ case EdgeType::Dereference:
+ return EdgeType::Reference;
+ case EdgeType::Reference:
+ return EdgeType::Dereference;
+ }
+ llvm_unreachable("Incomplete coverage of EdgeType enum");
+}
+
+static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
+ SmallVectorImpl<Edge> &Output) {
+ GetEdgesVisitor v(Analysis, Output);
+ v.visit(Inst);
+}
+
+static Level directionOfEdgeType(EdgeType Weight) {
+ switch (Weight) {
+ case EdgeType::Reference:
+ return Level::Above;
+ case EdgeType::Dereference:
+ return Level::Below;
+ case EdgeType::Assign:
+ return Level::Same;
+ }
+ llvm_unreachable("Incomplete switch coverage");
+}
+
+// Aside: We may remove graph construction entirely, because it doesn't really
+// buy us much that we don't already have. I'd like to add interprocedural
+// analysis prior to this however, in case that somehow requires the graph
+// produced by this for efficient execution
+static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+ SmallVectorImpl<Value *> &ReturnedValues,
+ NodeMapT &Map, GraphT &Graph) {
+ const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
+ auto Pair = Map.insert(std::make_pair(Val, GraphT::Node()));
+ auto &Iter = Pair.first;
+ if (Pair.second) {
+ auto NewNode = Graph.addNode();
+ Iter->second = NewNode;
+ }
+ return Iter->second;
+ };
+
+ SmallVector<Edge, 8> Edges;
+ for (auto &Bb : Fn->getBasicBlockList()) {
+ for (auto &Inst : Bb.getInstList()) {
+ // We don't want the edges of most "return" instructions, but we *do* want
+ // to know what can be returned.
+ if (auto *Ret = dyn_cast<ReturnInst>(&Inst))
+ ReturnedValues.push_back(Ret);
+
+ if (!hasUsefulEdges(&Inst))
+ continue;
+
+ Edges.clear();
+ argsToEdges(Analysis, &Inst, Edges);
+
+ // In the case of an unused alloca (or similar), edges may be empty. Note
+ // that it exists so we can potentially answer NoAlias.
+ if (Edges.empty()) {
+ auto MaybeVal = getTargetValue(&Inst);
+ assert(MaybeVal.hasValue());
+ auto *Target = *MaybeVal;
+ findOrInsertNode(Target);
+ continue;
+ }
+
+ for (const Edge &E : Edges) {
+ auto To = findOrInsertNode(E.To);
+ auto From = findOrInsertNode(E.From);
+ auto FlippedWeight = flipWeight(E.Weight);
+ auto Attrs = E.AdditionalAttrs;
+ Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
+ std::make_pair(FlippedWeight, Attrs));
+ }
+ }
+ }
+}
+
+static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
+ NodeMapT Map;
+ GraphT Graph;
+ SmallVector<Value *, 4> ReturnedValues;
+
+ buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph);
+
+ DenseMap<GraphT::Node, Value *> NodeValueMap;
+ NodeValueMap.resize(Map.size());
+ for (const auto &Pair : Map)
+ NodeValueMap.insert(std::make_pair(Pair.second, Pair.first));
+
+ const auto findValueOrDie = [&NodeValueMap](GraphT::Node Node) {
+ auto ValIter = NodeValueMap.find(Node);
+ assert(ValIter != NodeValueMap.end());
+ return ValIter->second;
+ };
+
+ StratifiedSetsBuilder<Value *> Builder;
+
+ SmallVector<GraphT::Node, 16> Worklist;
+ for (auto &Pair : Map) {
+ Worklist.clear();
+
+ auto *Value = Pair.first;
+ Builder.add(Value);
+ auto InitialNode = Pair.second;
+ Worklist.push_back(InitialNode);
+ while (!Worklist.empty()) {
+ auto Node = Worklist.pop_back_val();
+ auto *CurValue = findValueOrDie(Node);
+ if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue))
+ continue;
+
+ for (const auto &EdgeTuple : Graph.edgesFor(Node)) {
+ auto Weight = std::get<0>(EdgeTuple);
+ auto Label = Weight.first;
+ auto &OtherNode = std::get<1>(EdgeTuple);
+ auto *OtherValue = findValueOrDie(OtherNode);
+
+ if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue))
+ continue;
+
+ bool Added;
+ switch (directionOfEdgeType(Label)) {
+ case Level::Above:
+ Added = Builder.addAbove(CurValue, OtherValue);
+ break;
+ case Level::Below:
+ Added = Builder.addBelow(CurValue, OtherValue);
+ break;
+ case Level::Same:
+ Added = Builder.addWith(CurValue, OtherValue);
+ break;
+ }
+
+ if (Added) {
+ auto Aliasing = Weight.second;
+ if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
+ Aliasing.set(*MaybeCurIndex);
+ if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
+ Aliasing.set(*MaybeOtherIndex);
+ Builder.noteAttributes(CurValue, Aliasing);
+ Builder.noteAttributes(OtherValue, Aliasing);
+ Worklist.push_back(OtherNode);
+ }
+ }
+ }
+ }
+
+ // There are times when we end up with parameters not in our graph (i.e. if
+ // it's only used as the condition of a branch). Other bits of code depend on
+ // things that were present during construction being present in the graph.
+ // So, we add all present arguments here.
+ for (auto &Arg : Fn->args()) {
+ Builder.add(&Arg);
+ }
+
+ return FunctionInfo(Builder.build(), std::move(ReturnedValues));
+}
+
+void CFLAliasAnalysis::scan(Function *Fn) {
+ auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
+ (void)InsertPair;
+ assert(InsertPair.second &&
+ "Trying to scan a function that has already been cached");
+
+ FunctionInfo Info(buildSetsFrom(*this, Fn));
+ Cache[Fn] = std::move(Info);
+ Handles.push_front(FunctionHandle(Fn, this));
+}
+
+AliasAnalysis::AliasResult
+CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
+ const AliasAnalysis::Location &LocB) {
+ auto *ValA = const_cast<Value *>(LocA.Ptr);
+ auto *ValB = const_cast<Value *>(LocB.Ptr);
+
+ Function *Fn = nullptr;
+ auto MaybeFnA = parentFunctionOfValue(ValA);
+ auto MaybeFnB = parentFunctionOfValue(ValB);
+ if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
+ llvm_unreachable("Don't know how to extract the parent function "
+ "from values A or B");
+ }
+
+ if (MaybeFnA.hasValue()) {
+ Fn = *MaybeFnA;
+ assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
+ "Interprocedural queries not supported");
+ } else {
+ Fn = *MaybeFnB;
+ }
+
+ assert(Fn != nullptr);
+ auto &MaybeInfo = ensureCached(Fn);
+ assert(MaybeInfo.hasValue());
+
+ auto &Sets = MaybeInfo->Sets;
+ auto MaybeA = Sets.find(ValA);
+ if (!MaybeA.hasValue())
+ return AliasAnalysis::MayAlias;
+
+ auto MaybeB = Sets.find(ValB);
+ if (!MaybeB.hasValue())
+ return AliasAnalysis::MayAlias;
+
+ auto SetA = *MaybeA;
+ auto SetB = *MaybeB;
+
+ if (SetA.Index == SetB.Index)
+ return AliasAnalysis::PartialAlias;
+
+ auto AttrsA = Sets.getLink(SetA.Index).Attrs;
+ auto AttrsB = Sets.getLink(SetB.Index).Attrs;
+ // Stratified set attributes are used as markets to signify whether a member
+ // of a StratifiedSet (or a member of a set above the current set) has
+ // interacted with either arguments or globals. "Interacted with" meaning
+ // its value may be different depending on the value of an argument or
+ // global. The thought behind this is that, because arguments and globals
+ // may alias each other, if AttrsA and AttrsB have touched args/globals,
+ // we must conservatively say that they alias. However, if at least one of
+ // the sets has no values that could legally be altered by changing the value
+ // of an argument or global, then we don't have to be as conservative.
+ if (AttrsA.any() && AttrsB.any())
+ return AliasAnalysis::MayAlias;
+
+ return AliasAnalysis::NoAlias;
+}
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index 5d1d8a9c6e06..4a03002e510b 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -13,105 +13,10 @@
using namespace llvm;
-static cl::opt<bool>
-DebugPM("debug-cgscc-pass-manager", cl::Hidden,
- cl::desc("Print CGSCC pass management debugging information"));
-
-PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C,
- CGSCCAnalysisManager *AM) {
- PreservedAnalyses PA = PreservedAnalyses::all();
-
- if (DebugPM)
- dbgs() << "Starting CGSCC pass manager run.\n";
-
- for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
- if (DebugPM)
- dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n";
-
- PreservedAnalyses PassPA = Passes[Idx]->run(C, AM);
- if (AM)
- AM->invalidate(C, PassPA);
- PA.intersect(std::move(PassPA));
- }
-
- if (DebugPM)
- dbgs() << "Finished CGSCC pass manager run.\n";
-
- return PA;
-}
-
-bool CGSCCAnalysisManager::empty() const {
- assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() &&
- "The storage and index of analysis results disagree on how many there "
- "are!");
- return CGSCCAnalysisResults.empty();
-}
-
-void CGSCCAnalysisManager::clear() {
- CGSCCAnalysisResults.clear();
- CGSCCAnalysisResultLists.clear();
-}
-
-CGSCCAnalysisManager::ResultConceptT &
-CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) {
- CGSCCAnalysisResultMapT::iterator RI;
- bool Inserted;
- std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair(
- std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator()));
-
- // If we don't have a cached result for this function, look up the pass and
- // run it to produce a result, which we then add to the cache.
- if (Inserted) {
- CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C];
- ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this));
- RI->second = std::prev(ResultList.end());
- }
-
- return *RI->second->second;
-}
-
-CGSCCAnalysisManager::ResultConceptT *
-CGSCCAnalysisManager::getCachedResultImpl(void *PassID,
- LazyCallGraph::SCC *C) const {
- CGSCCAnalysisResultMapT::const_iterator RI =
- CGSCCAnalysisResults.find(std::make_pair(PassID, C));
- return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second;
-}
-
-void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) {
- CGSCCAnalysisResultMapT::iterator RI =
- CGSCCAnalysisResults.find(std::make_pair(PassID, C));
- if (RI == CGSCCAnalysisResults.end())
- return;
-
- CGSCCAnalysisResultLists[C].erase(RI->second);
-}
-
-void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C,
- const PreservedAnalyses &PA) {
- // Clear all the invalidated results associated specifically with this
- // function.
- SmallVector<void *, 8> InvalidatedPassIDs;
- CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C];
- for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(),
- E = ResultsList.end();
- I != E;)
- if (I->second->invalidate(C, PA)) {
- InvalidatedPassIDs.push_back(I->first);
- I = ResultsList.erase(I);
- } else {
- ++I;
- }
- while (!InvalidatedPassIDs.empty())
- CGSCCAnalysisResults.erase(
- std::make_pair(InvalidatedPassIDs.pop_back_val(), C));
- CGSCCAnalysisResultLists.erase(C);
-}
-
char CGSCCAnalysisManagerModuleProxy::PassID;
CGSCCAnalysisManagerModuleProxy::Result
-CGSCCAnalysisManagerModuleProxy::run(Module *M) {
+CGSCCAnalysisManagerModuleProxy::run(Module &M) {
assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!");
return Result(*CGAM);
}
@@ -123,7 +28,7 @@ CGSCCAnalysisManagerModuleProxy::Result::~Result() {
}
bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
- Module *M, const PreservedAnalyses &PA) {
+ Module &M, const PreservedAnalyses &PA) {
// If this proxy isn't marked as preserved, then we can't even invalidate
// individual CGSCC analyses, there may be an invalid set of SCC objects in
// the cache making it impossible to incrementally preserve them.
@@ -140,7 +45,7 @@ char ModuleAnalysisManagerCGSCCProxy::PassID;
char FunctionAnalysisManagerCGSCCProxy::PassID;
FunctionAnalysisManagerCGSCCProxy::Result
-FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) {
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C) {
assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!");
return Result(*FAM);
}
@@ -152,7 +57,7 @@ FunctionAnalysisManagerCGSCCProxy::Result::~Result() {
}
bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
- LazyCallGraph::SCC *C, const PreservedAnalyses &PA) {
+ LazyCallGraph::SCC &C, const PreservedAnalyses &PA) {
// If this proxy isn't marked as preserved, then we can't even invalidate
// individual function analyses, there may be an invalid set of Function
// objects in the cache making it impossible to incrementally preserve them.
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index d1632fd26ace..fbd7cf59d04b 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -5,12 +5,14 @@ add_llvm_library(LLVMAnalysis
AliasDebugger.cpp
AliasSetTracker.cpp
Analysis.cpp
+ AssumptionCache.cpp
BasicAliasAnalysis.cpp
BlockFrequencyInfo.cpp
BlockFrequencyInfoImpl.cpp
BranchProbabilityInfo.cpp
CFG.cpp
CFGPrinter.cpp
+ CFLAliasAnalysis.cpp
CGSCCPassManager.cpp
CaptureTracking.cpp
CostModel.cpp
@@ -20,6 +22,7 @@ add_llvm_library(LLVMAnalysis
DependenceAnalysis.cpp
DomPrinter.cpp
DominanceFrontier.cpp
+ FunctionTargetTransformInfo.cpp
IVUsers.cpp
InstCount.cpp
InstructionSimplify.cpp
@@ -53,6 +56,7 @@ add_llvm_library(LLVMAnalysis
TargetTransformInfo.cpp
Trace.cpp
TypeBasedAliasAnalysis.cpp
+ ScopedNoAliasAA.cpp
ValueTracking.cpp
)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index f2f8877af1a2..5a5475417951 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -19,8 +19,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
@@ -239,7 +239,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
if (Count++ >= Threshold)
return Tracker->tooManyUses();
- if (Visited.insert(&UU))
+ if (Visited.insert(&UU).second)
if (Tracker->shouldExplore(&UU))
Worklist.push_back(&UU);
}
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 4c8a093684f6..fa5683c6820d 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -11,23 +11,113 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "code-metrics"
using namespace llvm;
+static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
+ SmallPtrSetImpl<const Value*> &EphValues) {
+ SmallPtrSet<const Value *, 32> Visited;
+
+ // Make sure that all of the items in WorkSet are in our EphValues set.
+ EphValues.insert(WorkSet.begin(), WorkSet.end());
+
+ // Note: We don't speculate PHIs here, so we'll miss instruction chains kept
+ // alive only by ephemeral values.
+
+ while (!WorkSet.empty()) {
+ const Value *V = WorkSet.front();
+ WorkSet.erase(WorkSet.begin());
+
+ if (!Visited.insert(V).second)
+ continue;
+
+ // If all uses of this value are ephemeral, then so is this value.
+ bool FoundNEUse = false;
+ for (const User *I : V->users())
+ if (!EphValues.count(I)) {
+ FoundNEUse = true;
+ break;
+ }
+
+ if (FoundNEUse)
+ continue;
+
+ EphValues.insert(V);
+ DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
+
+ if (const User *U = dyn_cast<User>(V))
+ for (const Value *J : U->operands()) {
+ if (isSafeToSpeculativelyExecute(J))
+ WorkSet.push_back(J);
+ }
+ }
+}
+
+// Find all ephemeral values.
+void CodeMetrics::collectEphemeralValues(
+ const Loop *L, AssumptionCache *AC,
+ SmallPtrSetImpl<const Value *> &EphValues) {
+ SmallVector<const Value *, 16> WorkSet;
+
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ Instruction *I = cast<Instruction>(AssumeVH);
+
+ // Filter out call sites outside of the loop so we don't to a function's
+ // worth of work for each of its loops (and, in the common case, ephemeral
+ // values in the loop are likely due to @llvm.assume calls in the loop).
+ if (!L->contains(I->getParent()))
+ continue;
+
+ WorkSet.push_back(I);
+ }
+
+ completeEphemeralValues(WorkSet, EphValues);
+}
+
+void CodeMetrics::collectEphemeralValues(
+ const Function *F, AssumptionCache *AC,
+ SmallPtrSetImpl<const Value *> &EphValues) {
+ SmallVector<const Value *, 16> WorkSet;
+
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ Instruction *I = cast<Instruction>(AssumeVH);
+ assert(I->getParent()->getParent() == F &&
+ "Found assumption for the wrong function!");
+ WorkSet.push_back(I);
+ }
+
+ completeEphemeralValues(WorkSet, EphValues);
+}
+
/// analyzeBasicBlock - Fill in the current structure with information gleaned
/// from the specified block.
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
- const TargetTransformInfo &TTI) {
+ const TargetTransformInfo &TTI,
+ SmallPtrSetImpl<const Value*> &EphValues) {
++NumBlocks;
unsigned NumInstsBeforeThisBB = NumInsts;
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
+ // Skip ephemeral values.
+ if (EphValues.count(II))
+ continue;
+
// Special handling for calls.
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
ImmutableCallSite CS(cast<Instruction>(II));
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 8dc94219027f..fd8f2aee2db5 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -47,15 +47,16 @@ using namespace llvm;
// Constant Folding internal helper functions
//===----------------------------------------------------------------------===//
-/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with
-/// DataLayout. This always returns a non-null constant, but it may be a
+/// Constant fold bitcast, symbolically evaluating it with DataLayout.
+/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
static Constant *FoldBitCast(Constant *C, Type *DestTy,
const DataLayout &TD) {
// Catch the obvious splat cases.
if (C->isNullValue() && !DestTy->isX86_MMXTy())
return Constant::getNullValue(DestTy);
- if (C->isAllOnesValue() && !DestTy->isX86_MMXTy())
+ if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+ !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
return Constant::getAllOnesValue(DestTy);
// Handle a vector->integer cast.
@@ -197,7 +198,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
unsigned Ratio = NumDstElt/NumSrcElt;
- unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+ unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy);
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
@@ -213,6 +214,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
ConstantInt::get(Src->getType(), ShiftAmt));
ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+ // Truncate the element to an integer with the same pointer size and
+ // convert the element back to a pointer using a inttoptr.
+ if (DstEltTy->isPointerTy()) {
+ IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
+ Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
+ Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
+ continue;
+ }
+
// Truncate and remember this piece.
Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
}
@@ -222,9 +232,8 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
}
-/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
-/// from a global, return the global and the constant. Because of
-/// constantexprs, this function is recursive.
+/// If this constant is a constant offset from a global, return the global and
+/// the constant. Because of constantexprs, this function is recursive.
static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt &Offset, const DataLayout &TD) {
// Trivial case, constant is the global.
@@ -264,10 +273,10 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
return true;
}
-/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the
-/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the
-/// pointer to copy results into and BytesLeft is the number of bytes left in
-/// the CurPtr buffer. TD is the target data.
+/// Recursive helper to read bits out of global. C is the constant being copied
+/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
+/// results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer. TD is the target data.
static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
unsigned char *CurPtr, unsigned BytesLeft,
const DataLayout &TD) {
@@ -518,9 +527,8 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
return nullptr;
}
-/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
-/// produce if it is constant and determinable. If this is not determinable,
-/// return null.
+/// Return the value that a load from C would produce if it is constant and
+/// determinable. If this is not determinable, return null.
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
const DataLayout *TD) {
// First, try the easy cases:
@@ -610,7 +618,7 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
return nullptr;
}
-/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// One of Op0/Op1 is a constant expression.
/// Attempt to symbolically evaluate the result of a binary operator merging
/// these together. If target data info is available, it is provided as DL,
/// otherwise DL is null.
@@ -667,9 +675,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
return nullptr;
}
-/// CastGEPIndices - If array indices are not pointer-sized integers,
-/// explicitly cast them so that they aren't implicitly casted by the
-/// getelementptr.
+/// If array indices are not pointer-sized integers, explicitly cast them so
+/// that they aren't implicitly casted by the getelementptr.
static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
@@ -724,8 +731,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
return Ptr;
}
-/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
-/// constant expression, do so.
+/// If we can symbolically evaluate the GEP constant expression, do so.
static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
@@ -887,7 +893,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Constant Folding public APIs
//===----------------------------------------------------------------------===//
-/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// Try to constant fold the specified instruction.
/// If successful, the constant result is returned, if not, null is returned.
/// Note that this fails if not all of the operands are constant. Otherwise,
/// this function can only fail when attempting to fold instructions like loads
@@ -967,7 +973,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
static Constant *
ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
const TargetLibraryInfo *TLI,
- SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
+ SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
SmallVector<Constant *, 8> Ops;
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
++i) {
@@ -975,7 +981,7 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
// Recursively fold the ConstantExpr's operands. If we have already folded
// a ConstantExpr, we don't have to process it again.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
- if (FoldedOps.insert(NewCE))
+ if (FoldedOps.insert(NewCE).second)
NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
}
Ops.push_back(NewC);
@@ -987,7 +993,7 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
}
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// Attempt to fold the constant expression
/// using the specified DataLayout. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
@@ -997,7 +1003,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
}
-/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// Attempt to constant fold an instruction with the
/// specified opcode and operands. If successful, the constant result is
/// returned, if not, null is returned. Note that this function can fail when
/// attempting to fold instructions like loads and stores, which have no
@@ -1102,10 +1108,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
}
}
-/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// Attempt to constant fold a compare
/// instruction (icmp/fcmp) with the specified operands. If it fails, it
/// returns a constant expression of the specified operands.
-///
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *Ops0, Constant *Ops1,
const DataLayout *TD,
@@ -1192,9 +1197,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
}
-/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
-/// getelementptr constantexpr, return the constant value being addressed by the
-/// constant expression, or null if something is funny and we can't decide.
+/// Given a constant and a getelementptr constantexpr, return the constant value
+/// being addressed by the constant expression, or null if something is funny
+/// and we can't decide.
Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
ConstantExpr *CE) {
if (!CE->getOperand(1)->isNullValue())
@@ -1210,10 +1215,9 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
return C;
}
-/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
-/// indices (with an *implied* zero pointer index that is not in the list),
-/// return the constant value being addressed by a virtual load, or null if
-/// something is funny and we can't decide.
+/// Given a constant and getelementptr indices (with an *implied* zero pointer
+/// index that is not in the list), return the constant value being addressed by
+/// a virtual load, or null if something is funny and we can't decide.
Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
ArrayRef<Constant*> Indices) {
// Loop over all of the operands, tracking down which value we are
@@ -1231,11 +1235,12 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// Constant Folding for Calls
//
-/// canConstantFoldCallTo - Return true if its even possible to fold a call to
-/// the specified function.
+/// Return true if it's even possible to fold a call to the specified function.
bool llvm::canConstantFoldCallTo(const Function *F) {
switch (F->getIntrinsicID()) {
case Intrinsic::fabs:
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
case Intrinsic::log:
case Intrinsic::log2:
case Intrinsic::log10:
@@ -1321,7 +1326,7 @@ static Constant *GetConstantFoldFPValue(double V, Type *Ty) {
}
namespace {
-/// llvm_fenv_clearexcept - Clear the floating-point exception state.
+/// Clear the floating-point exception state.
static inline void llvm_fenv_clearexcept() {
#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
feclearexcept(FE_ALL_EXCEPT);
@@ -1329,7 +1334,7 @@ static inline void llvm_fenv_clearexcept() {
errno = 0;
}
-/// llvm_fenv_testexcept - Test if a floating-point exception was raised.
+/// Test if a floating-point exception was raised.
static inline bool llvm_fenv_testexcept() {
int errno_val = errno;
if (errno_val == ERANGE || errno_val == EDOM)
@@ -1366,14 +1371,13 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
return GetConstantFoldFPValue(V, Ty);
}
-/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
-/// conversion of a constant floating point. If roundTowardZero is false, the
-/// default IEEE rounding is used (toward nearest, ties to even). This matches
-/// the behavior of the non-truncating SSE instructions in the default rounding
-/// mode. The desired integer type Ty is used to select how many bits are
-/// available for the result. Returns null if the conversion cannot be
-/// performed, otherwise returns the Constant value resulting from the
-/// conversion.
+/// Attempt to fold an SSE floating point to integer conversion of a constant
+/// floating point. If roundTowardZero is false, the default IEEE rounding is
+/// used (toward nearest, ties to even). This matches the behavior of the
+/// non-truncating SSE instructions in the default rounding mode. The desired
+/// integer type Ty is used to select how many bits are available for the
+/// result. Returns null if the conversion cannot be performed, otherwise
+/// returns the Constant value resulting from the conversion.
static Constant *ConstantFoldConvertToInt(const APFloat &Val,
bool roundTowardZero, Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
@@ -1520,8 +1524,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
(Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
if (V >= -0.0)
return ConstantFoldFP(sqrt, V, Ty);
- else // Undefined
- return Constant::getNullValue(Ty);
+ else {
+ // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which
+ // all guarantee or favor returning NaN - the square root of a
+ // negative number is not defined for the LLVM sqrt intrinsic.
+ // This is because the intrinsic should only be emitted in place of
+ // libm's sqrt function when using "no-nans-fp-math".
+ return UndefValue::get(Ty);
+ }
}
break;
case 's':
@@ -1627,6 +1637,19 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
V1.copySign(V2);
return ConstantFP::get(Ty->getContext(), V1);
}
+
+ if (IntrinsicID == Intrinsic::minnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), minnum(C1, C2));
+ }
+
+ if (IntrinsicID == Intrinsic::maxnum) {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ return ConstantFP::get(Ty->getContext(), maxnum(C1, C2));
+ }
+
if (!TLI)
return nullptr;
if (Name == "pow" && TLI->has(LibFunc::pow))
@@ -1762,7 +1785,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
return ConstantVector::get(Result);
}
-/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// Attempt to constant fold a call to the specified function
/// with the specified arguments, returning null if unsuccessful.
Constant *
llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index d0784f1e678d..092df5c15cde 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -163,16 +163,15 @@ void dumpExampleDependence(raw_ostream &OS, Function *F,
DstI != DstE; ++DstI) {
if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
OS << "da analyze - ";
- if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) {
+ if (auto D = DA->depends(&*SrcI, &*DstI, true)) {
D->dump(OS);
for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
if (D->isSplitable(Level)) {
OS << "da analyze - split level = " << Level;
- OS << ", iteration = " << *DA->getSplitIteration(D, Level);
+ OS << ", iteration = " << *DA->getSplitIteration(*D, Level);
OS << "!\n";
}
}
- delete D;
}
else
OS << "none!\n";
@@ -782,6 +781,25 @@ void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
}
}
+void DependenceAnalysis::unifySubscriptType(Subscript *Pair) {
+ const SCEV *Src = Pair->Src;
+ const SCEV *Dst = Pair->Dst;
+ IntegerType *SrcTy = dyn_cast<IntegerType>(Src->getType());
+ IntegerType *DstTy = dyn_cast<IntegerType>(Dst->getType());
+ if (SrcTy == nullptr || DstTy == nullptr) {
+ assert(SrcTy == DstTy && "This function only unify integer types and "
+ "expect Src and Dst share the same type "
+ "otherwise.");
+ return;
+ }
+ if (SrcTy->getBitWidth() > DstTy->getBitWidth()) {
+ // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst).
+ Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy);
+ } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) {
+ // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src).
+ Pair->Src = SE->getSignExtendExpr(Src, DstTy);
+ }
+}
// removeMatchingExtensions - Examines a subscript pair.
// If the source and destination are identically sign (or zero)
@@ -794,9 +812,11 @@ void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
(isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
- if (SrcCast->getType() == DstCast->getType()) {
- Pair->Src = SrcCast->getOperand();
- Pair->Dst = DstCast->getOperand();
+ const SCEV *SrcCastOp = SrcCast->getOperand();
+ const SCEV *DstCastOp = DstCast->getOperand();
+ if (SrcCastOp->getType() == DstCastOp->getType()) {
+ Pair->Src = SrcCastOp;
+ Pair->Dst = DstCastOp;
}
}
}
@@ -2957,15 +2977,11 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
AddRec->getNoWrapFlags());
}
if (SE->isLoopInvariant(AddRec, TargetLoop))
- return SE->getAddRecExpr(AddRec,
- Value,
- TargetLoop,
- SCEV::FlagAnyWrap);
- return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
- TargetLoop, Value),
- AddRec->getStepRecurrence(*SE),
- AddRec->getLoop(),
- AddRec->getNoWrapFlags());
+ return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap);
+ return SE->getAddRecExpr(
+ addToCoefficient(AddRec->getStart(), TargetLoop, Value),
+ AddRec->getStepRecurrence(*SE), AddRec->getLoop(),
+ AddRec->getNoWrapFlags());
}
@@ -3183,7 +3199,7 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
const SCEV *DstSCEV,
SmallVectorImpl<Subscript> &Pair,
- const SCEV *ElementSize) const {
+ const SCEV *ElementSize) {
const SCEVUnknown *SrcBase =
dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV));
const SCEVUnknown *DstBase =
@@ -3238,6 +3254,7 @@ bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
for (int i = 0; i < size; ++i) {
Pair[i].Src = SrcSubscripts[i];
Pair[i].Dst = DstSubscripts[i];
+ unifySubscriptType(&Pair[i]);
// FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
// delinearization has found, and add these constraints to the dependence
@@ -3277,9 +3294,9 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
//
// Care is required to keep the routine below, getSplitIteration(),
// up to date with respect to this routine.
-Dependence *DependenceAnalysis::depends(Instruction *Src,
- Instruction *Dst,
- bool PossiblyLoopIndependent) {
+std::unique_ptr<Dependence>
+DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
+ bool PossiblyLoopIndependent) {
if (Src == Dst)
PossiblyLoopIndependent = false;
@@ -3291,7 +3308,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
DEBUG(dbgs() << "can only handle simple loads and stores\n");
- return new Dependence(Src, Dst);
+ return make_unique<Dependence>(Src, Dst);
}
Value *SrcPtr = getPointerOperand(Src);
@@ -3302,7 +3319,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
case AliasAnalysis::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
DEBUG(dbgs() << "can't analyze may or partial alias\n");
- return new Dependence(Src, Dst);
+ return make_unique<Dependence>(Src, Dst);
case AliasAnalysis::NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
DEBUG(dbgs() << "no alias\n");
@@ -3346,6 +3363,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
++SrcIdx, ++DstIdx, ++P) {
Pair[P].Src = SE->getSCEV(*SrcIdx);
Pair[P].Dst = SE->getSCEV(*DstIdx);
+ unifySubscriptType(&Pair[P]);
}
}
else {
@@ -3675,9 +3693,9 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
return nullptr;
}
- FullDependence *Final = new FullDependence(Result);
+ auto Final = make_unique<FullDependence>(Result);
Result.DV = nullptr;
- return Final;
+ return std::move(Final);
}
@@ -3729,13 +3747,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
//
// breaks the dependence and allows us to vectorize/parallelize
// both loops.
-const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
+const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
unsigned SplitLevel) {
- assert(Dep && "expected a pointer to a Dependence");
- assert(Dep->isSplitable(SplitLevel) &&
+ assert(Dep.isSplitable(SplitLevel) &&
"Dep should be splitable at SplitLevel");
- Instruction *Src = Dep->getSrc();
- Instruction *Dst = Dep->getDst();
+ Instruction *Src = Dep.getSrc();
+ Instruction *Dst = Dep.getDst();
assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
assert(isLoadOrStore(Src));
diff --git a/lib/Analysis/FunctionTargetTransformInfo.cpp b/lib/Analysis/FunctionTargetTransformInfo.cpp
new file mode 100644
index 000000000000..a686bec4e73f
--- /dev/null
+++ b/lib/Analysis/FunctionTargetTransformInfo.cpp
@@ -0,0 +1,50 @@
+//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass wraps a TargetTransformInfo in a FunctionPass so that it can
+// forward along the current Function so that we can make target specific
+// decisions based on the particular subtarget specified for each Function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/FunctionTargetTransformInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-tti"
+static const char ftti_name[] = "Function TargetTransformInfo";
+INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
+char FunctionTargetTransformInfo::ID = 0;
+
+namespace llvm {
+FunctionPass *createFunctionTargetTransformInfoPass() {
+ return new FunctionTargetTransformInfo();
+}
+}
+
+FunctionTargetTransformInfo::FunctionTargetTransformInfo()
+ : FunctionPass(ID), Fn(nullptr), TTI(nullptr) {
+ initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry());
+}
+
+void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<TargetTransformInfo>();
+}
+
+void FunctionTargetTransformInfo::releaseMemory() {}
+
+bool FunctionTargetTransformInfo::runOnFunction(Function &F) {
+ Fn = &F;
+ TTI = &getAnalysis<TargetTransformInfo>();
+ return false;
+}
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 67b413577980..6095136d60a1 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_library(LLVMipa
CallGraph.cpp
CallGraphSCCPass.cpp
CallPrinter.cpp
- FindUsedTypes.cpp
GlobalsModRef.cpp
IPA.cpp
InlineCost.cpp
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index dfabb0ad063d..67cf7f86e072 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -282,6 +282,3 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
#endif
-
-// Enuse that users of CallGraph.h also link with this file
-DEFINING_FILE_FOR(CallGraph)
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index c27edbfa2ff5..ded1de79a124 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -21,8 +21,8 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManagers.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
@@ -243,7 +243,14 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
assert(!CallSites.count(I->first) &&
"Call site occurs in node multiple times");
- CallSites.insert(std::make_pair(I->first, I->second));
+
+ CallSite CS(I->first);
+ if (CS) {
+ Function *Callee = CS.getCalledFunction();
+ // Ignore intrinsics because they're not really function calls.
+ if (!Callee || !(Callee->isIntrinsic()))
+ CallSites.insert(std::make_pair(I->first, I->second));
+ }
++I;
}
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
deleted file mode 100644
index b37344b044ce..000000000000
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to seek out all of the types in use by the program. Note
-// that this analysis explicitly does not include types only used by the symbol
-// table.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-char FindUsedTypes::ID = 0;
-INITIALIZE_PASS(FindUsedTypes, "print-used-types",
- "Find Used Types", false, true)
-
-// IncorporateType - Incorporate one type and all of its subtypes into the
-// collection of used types.
-//
-void FindUsedTypes::IncorporateType(Type *Ty) {
- // If ty doesn't already exist in the used types map, add it now, otherwise
- // return.
- if (!UsedTypes.insert(Ty)) return; // Already contain Ty.
-
- // Make sure to add any types this type references now.
- //
- for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
- I != E; ++I)
- IncorporateType(*I);
-}
-
-void FindUsedTypes::IncorporateValue(const Value *V) {
- IncorporateType(V->getType());
-
- // If this is a constant, it could be using other types...
- if (const Constant *C = dyn_cast<Constant>(V)) {
- if (!isa<GlobalValue>(C))
- for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
- OI != OE; ++OI)
- IncorporateValue(*OI);
- }
-}
-
-
-// run - This incorporates all types used by the specified module
-//
-bool FindUsedTypes::runOnModule(Module &m) {
- UsedTypes.clear(); // reset if run multiple times...
-
- // Loop over global variables, incorporating their types
- for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
- I != E; ++I) {
- IncorporateType(I->getType());
- if (I->hasInitializer())
- IncorporateValue(I->getInitializer());
- }
-
- for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
- IncorporateType(MI->getType());
- const Function &F = *MI;
-
- // Loop over all of the instructions in the function, adding their return
- // type as well as the types of their operands.
- //
- for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
- II != IE; ++II) {
- const Instruction &I = *II;
-
- IncorporateType(I.getType()); // Incorporate the type of the instruction
- for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
- OI != OE; ++OI)
- IncorporateValue(*OI); // Insert inst operand types as well
- }
- }
-
- return false;
-}
-
-// Print the types found in the module. If the optional Module parameter is
-// passed in, then the types are printed symbolically if possible, using the
-// symbol table from the module.
-//
-void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
- OS << "Types in use by this module:\n";
- for (SetVector<Type *>::const_iterator I = UsedTypes.begin(),
- E = UsedTypes.end(); I != E; ++I) {
- OS << " " << **I << '\n';
- }
-}
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index b26c052de67e..806bfb81b6d5 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -22,7 +22,6 @@ void llvm::initializeIPA(PassRegistry &Registry) {
initializeCallGraphWrapperPassPass(Registry);
initializeCallGraphPrinterPass(Registry);
initializeCallGraphViewerPass(Registry);
- initializeFindUsedTypesPass(Registry);
initializeGlobalsModRefPass(Registry);
}
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 8807529cabac..58ac5d33409e 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -49,6 +51,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
+ /// The cache of @llvm.assume intrinsics.
+ AssumptionCache &AC;
+
// The called function.
Function &F;
@@ -104,7 +109,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
// Custom analysis routines.
- bool analyzeBlock(BasicBlock *BB);
+ bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
@@ -141,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
- Function &Callee, int Threshold)
- : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+ AssumptionCache &AC, Function &Callee, int Threshold)
+ : DL(DL), TTI(TTI), AC(AC), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@@ -778,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);
+ CallAnalyzer CA(DL, TTI, AC, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
@@ -881,7 +886,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
+ SmallPtrSetImpl<const Value *> &EphValues) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,
@@ -893,6 +899,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ // Skip ephemeral values.
+ if (EphValues.count(I))
+ continue;
+
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@@ -967,7 +977,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
break;
}
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
- } while (Visited.insert(V));
+ } while (Visited.insert(V).second);
Type *IntPtrTy = DL->getIntPtrType(V->getContext());
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
@@ -1096,6 +1106,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
NumAllocaArgs = SROAArgValues.size();
+ // FIXME: If a caller has multiple calls to a callee, we end up recomputing
+ // the ephemeral values multiple times (and they're completely determined by
+ // the callee, so this is purely duplicate work).
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
+
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
// particular call site in order to get more accurate cost estimates. This
@@ -1129,7 +1145,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
- if (!analyzeBlock(BB)) {
+ if (!analyzeBlock(BB, EphValues)) {
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr)
return false;
@@ -1217,6 +1233,7 @@ void CallAnalyzer::dump() {
INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
@@ -1228,12 +1245,14 @@ InlineCostAnalysis::~InlineCostAnalysis() {}
void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
TTI = &getAnalysis<TargetTransformInfo>();
+ ACT = &getAnalysis<AssumptionCacheTracker>();
return false;
}
@@ -1290,7 +1309,8 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold);
+ CallAnalyzer CA(Callee->getDataLayout(), *TTI,
+ ACT->getAssumptionCache(*Callee), *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 24655aa002c8..6b5f37002b80 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -84,7 +84,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
/// form.
static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
const LoopInfo *LI,
- SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
Loop *NearestLoop = nullptr;
for (DomTreeNode *Rung = DT->getNode(BB);
Rung; Rung = Rung->getIDom()) {
@@ -112,10 +112,10 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
bool IVUsers::AddUsersImpl(Instruction *I,
- SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+ SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
// Add this IV user to the Processed set before returning false to ensure that
// all IV users are members of the set. See IVUsers::isIVUserOrOperand.
- if (!Processed.insert(I))
+ if (!Processed.insert(I).second)
return true; // Instruction already handled.
if (!SE->isSCEVable(I->getType()))
@@ -145,7 +145,7 @@ bool IVUsers::AddUsersImpl(Instruction *I,
SmallPtrSet<Instruction *, 4> UniqueUsers;
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
- if (!UniqueUsers.insert(User))
+ if (!UniqueUsers.insert(User).second)
continue;
// Do not infinitely recurse on PHI nodes.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 7a820a58f6f0..3fbbd7cbd22a 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
+#include <algorithm>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -41,14 +43,20 @@ enum { RecursionLimit = 3 };
STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumReassoc, "Number of reassociations");
+namespace {
struct Query {
const DataLayout *DL;
const TargetLibraryInfo *TLI;
const DominatorTree *DT;
+ AssumptionCache *AC;
+ const Instruction *CxtI;
Query(const DataLayout *DL, const TargetLibraryInfo *tli,
- const DominatorTree *dt) : DL(DL), TLI(tli), DT(dt) {}
+ const DominatorTree *dt, AssumptionCache *ac = nullptr,
+ const Instruction *cxti = nullptr)
+ : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
};
+} // end anonymous namespace
static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
@@ -575,8 +583,9 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -624,7 +633,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
}
assert(V->getType()->getScalarType()->isPointerTy() &&
"Unexpected operand type!");
- } while (Visited.insert(V));
+ } while (Visited.insert(V).second);
Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset);
if (V->getType()->isVectorTy())
@@ -676,6 +685,10 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
+ // 0 - X -> 0 if the sub is NUW.
+ if (isNUW && match(Op0, m_Zero()))
+ return Op0;
+
// (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
// For example, (X + Y) - Y -> X; (Y + X) - Y -> X
Value *X = nullptr, *Y = nullptr, *Z = Op1;
@@ -769,8 +782,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -946,29 +960,38 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
}
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
+ const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
+ const DataLayout *DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
-Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
- FastMathFlags FMF,
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
@@ -988,6 +1011,10 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (match(Op1, m_Undef()))
return Op1;
+ // X / 0 -> undef, we don't need to preserve faults!
+ if (match(Op1, m_Zero()))
+ return UndefValue::get(Op1->getType());
+
// undef / X -> 0
if (match(Op0, m_Undef()))
return Constant::getNullValue(Op0->getType());
@@ -1028,6 +1055,16 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
(!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
return Constant::getNullValue(Op0->getType());
+ // (X /u C1) /u C2 -> 0 if C1 * C2 overflow
+ ConstantInt *C1, *C2;
+ if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) &&
+ match(Op1, m_ConstantInt(C2))) {
+ bool Overflow;
+ C1->getValue().umul_ov(C2->getValue(), Overflow);
+ if (Overflow)
+ return Constant::getNullValue(Op0->getType());
+ }
+
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
@@ -1055,8 +1092,10 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// SimplifyUDivInst - Given operands for a UDiv, see if we can
@@ -1071,8 +1110,10 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
@@ -1090,8 +1131,10 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// SimplifyRem - Given operands for an SRem or URem, see if we can
@@ -1133,6 +1176,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
+ // (X % Y) % Y -> X % Y
+ if ((Opcode == Instruction::SRem &&
+ match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+ (Opcode == Instruction::URem &&
+ match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+ return Op0;
+
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
@@ -1160,8 +1210,10 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// SimplifyURemInst - Given operands for a URem, see if we can
@@ -1176,8 +1228,10 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
@@ -1195,8 +1249,10 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// isUndefShift - Returns true if a shift by \c Amount always yields undef.
@@ -1264,6 +1320,37 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
return nullptr;
}
+/// \brief Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result. If not, this returns null.
+static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
+ bool isExact, const Query &Q,
+ unsigned MaxRecurse) {
+ if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
+
+ // X >> X -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(Op0->getType());
+
+ // undef >> X -> 0
+ // undef >> X -> undef (if it's exact)
+ if (match(Op0, m_Undef()))
+ return isExact ? Op0 : Constant::getNullValue(Op0->getType());
+
+ // The low bit cannot be shifted out of an exact shift if it is set.
+ if (isExact) {
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ APInt Op0KnownZero(BitWidth, 0);
+ APInt Op0KnownOne(BitWidth, 0);
+ computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC,
+ Q.CxtI, Q.DT);
+ if (Op0KnownOne[0])
+ return Op0;
+ }
+
+ return nullptr;
+}
+
/// SimplifyShlInst - Given operands for an Shl, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -1272,8 +1359,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return V;
// undef << X -> 0
+ // undef << X -> undef if (if it's NSW/NUW)
if (match(Op0, m_Undef()))
- return Constant::getNullValue(Op0->getType());
+ return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType());
// (X >> A) << A -> X
Value *X;
@@ -1284,8 +1372,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1293,21 +1382,13 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// fold the result. If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
- return V;
-
- // X >> X -> 0
- if (Op0 == Op1)
- return Constant::getNullValue(Op0->getType());
-
- // undef >>l X -> 0
- if (match(Op0, m_Undef()))
- return Constant::getNullValue(Op0->getType());
+ if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
+ MaxRecurse))
+ return V;
// (X << A) >> A -> X
Value *X;
- if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
- cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+ if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
return X;
return nullptr;
@@ -1316,8 +1397,9 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1325,29 +1407,21 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
/// fold the result. If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
- if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
+ if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
+ MaxRecurse))
return V;
- // X >> X -> 0
- if (Op0 == Op1)
- return Constant::getNullValue(Op0->getType());
-
// all ones >>a X -> all ones
if (match(Op0, m_AllOnes()))
return Op0;
- // undef >>a X -> all ones
- if (match(Op0, m_Undef()))
- return Constant::getAllOnesValue(Op0->getType());
-
// (X << A) >> A -> X
Value *X;
- if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
- cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+ if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
return X;
// Arithmetic shifting an all-sign-bit value is a no-op.
- unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL);
+ unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (NumSignBits == Op0->getType()->getScalarSizeInBits())
return Op0;
@@ -1357,11 +1431,105 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
+static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
+ ICmpInst *UnsignedICmp, bool IsAnd) {
+ Value *X, *Y;
+
+ ICmpInst::Predicate EqPred;
+ if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(Y), m_Zero())) ||
+ !ICmpInst::isEquality(EqPred))
+ return nullptr;
+
+ ICmpInst::Predicate UnsignedPred;
+ if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) &&
+ ICmpInst::isUnsigned(UnsignedPred))
+ ;
+ else if (match(UnsignedICmp,
+ m_ICmp(UnsignedPred, m_Value(Y), m_Specific(X))) &&
+ ICmpInst::isUnsigned(UnsignedPred))
+ UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
+ else
+ return nullptr;
+
+ // X < Y && Y != 0 --> X < Y
+ // X < Y || Y != 0 --> Y != 0
+ if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
+ return IsAnd ? UnsignedICmp : ZeroICmp;
+
+ // X >= Y || Y != 0 --> true
+ // X >= Y || Y == 0 --> X >= Y
+ if (UnsignedPred == ICmpInst::ICMP_UGE && !IsAnd) {
+ if (EqPred == ICmpInst::ICMP_NE)
+ return getTrue(UnsignedICmp->getType());
+ return UnsignedICmp;
+ }
+
+ // X < Y && Y == 0 --> false
+ if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ &&
+ IsAnd)
+ return getFalse(UnsignedICmp->getType());
+
+ return nullptr;
+}
+
+// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
+// of possible values cannot be satisfied.
+static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+ ICmpInst::Predicate Pred0, Pred1;
+ ConstantInt *CI1, *CI2;
+ Value *V;
+
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
+ return X;
+
+ if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
+ m_ConstantInt(CI2))))
+ return nullptr;
+
+ if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+ return nullptr;
+
+ Type *ITy = Op0->getType();
+
+ auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ bool isNSW = AddInst->hasNoSignedWrap();
+ bool isNUW = AddInst->hasNoUnsignedWrap();
+
+ const APInt &CI1V = CI1->getValue();
+ const APInt &CI2V = CI2->getValue();
+ const APInt Delta = CI2V - CI1V;
+ if (CI1V.isStrictlyPositive()) {
+ if (Delta == 2) {
+ if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT)
+ return getFalse(ITy);
+ if (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT && isNSW)
+ return getFalse(ITy);
+ }
+ if (Delta == 1) {
+ if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_SGT)
+ return getFalse(ITy);
+ if (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGT && isNSW)
+ return getFalse(ITy);
+ }
+ }
+ if (CI1V.getBoolValue() && isNUW) {
+ if (Delta == 2)
+ if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Delta == 1)
+ if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ }
+
+ return nullptr;
+}
+
/// SimplifyAndInst - Given operands for an And, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
@@ -1412,12 +1580,21 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
- if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true))
+ if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
return Op0;
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true))
+ if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
return Op1;
}
+ if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
+ if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
+ if (Value *V = SimplifyAndOfICmps(ICILHS, ICIRHS))
+ return V;
+ if (Value *V = SimplifyAndOfICmps(ICIRHS, ICILHS))
+ return V;
+ }
+ }
+
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
MaxRecurse))
@@ -1452,8 +1629,62 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
+// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
+// contains all possible values.
+static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+ ICmpInst::Predicate Pred0, Pred1;
+ ConstantInt *CI1, *CI2;
+ Value *V;
+
+ if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
+ return X;
+
+ if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
+ m_ConstantInt(CI2))))
+ return nullptr;
+
+ if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+ return nullptr;
+
+ Type *ITy = Op0->getType();
+
+ auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ bool isNSW = AddInst->hasNoSignedWrap();
+ bool isNUW = AddInst->hasNoUnsignedWrap();
+
+ const APInt &CI1V = CI1->getValue();
+ const APInt &CI2V = CI2->getValue();
+ const APInt Delta = CI2V - CI1V;
+ if (CI1V.isStrictlyPositive()) {
+ if (Delta == 2) {
+ if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
+ return getTrue(ITy);
+ if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+ return getTrue(ITy);
+ }
+ if (Delta == 1) {
+ if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE)
+ return getTrue(ITy);
+ if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW)
+ return getTrue(ITy);
+ }
+ }
+ if (CI1V.getBoolValue() && isNUW) {
+ if (Delta == 2)
+ if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ if (Delta == 1)
+ if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+
+ return nullptr;
}
/// SimplifyOrInst - Given operands for an Or, see if we can
@@ -1513,6 +1744,15 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
(A == Op0 || B == Op0))
return Constant::getAllOnesValue(Op0->getType());
+ if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
+ if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
+ if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS))
+ return V;
+ if (Value *V = SimplifyOrOfICmps(ICIRHS, ICILHS))
+ return V;
+ }
+ }
+
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
@@ -1545,18 +1785,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
match(A, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
- if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+ if (V1 == B &&
+ MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return A;
- if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+ if (V2 == B &&
+ MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return A;
}
// Or commutes, try both ways.
if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
match(B, m_Add(m_Value(V1), m_Value(V2)))) {
// Add commutes, try both ways.
- if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+ if (V1 == A &&
+ MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return B;
- if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+ if (V2 == A &&
+ MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return B;
}
}
@@ -1573,8 +1817,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// SimplifyXorInst - Given operands for a Xor, see if we can
@@ -1628,8 +1874,10 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
static Type *GetCompareTy(Value *Op) {
@@ -1804,6 +2052,50 @@ static Constant *computePointerICmp(const DataLayout *DL,
return ConstantExpr::getICmp(Pred,
ConstantExpr::getAdd(LHSOffset, LHSNoBound),
ConstantExpr::getAdd(RHSOffset, RHSNoBound));
+
+ // If one side of the equality comparison must come from a noalias call
+ // (meaning a system memory allocation function), and the other side must
+ // come from a pointer that cannot overlap with dynamically-allocated
+ // memory within the lifetime of the current function (allocas, byval
+ // arguments, globals), then determine the comparison result here.
+ SmallVector<Value *, 8> LHSUObjs, RHSUObjs;
+ GetUnderlyingObjects(LHS, LHSUObjs, DL);
+ GetUnderlyingObjects(RHS, RHSUObjs, DL);
+
+ // Is the set of underlying objects all noalias calls?
+ auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
+ return std::all_of(Objects.begin(), Objects.end(),
+ [](Value *V){ return isNoAliasCall(V); });
+ };
+
+ // Is the set of underlying objects all things which must be disjoint from
+ // noalias calls. For allocas, we consider only static ones (dynamic
+ // allocas might be transformed into calls to malloc not simultaneously
+ // live with the compared-to allocation). For globals, we exclude symbols
+ // that might be resolve lazily to symbols in another dynamically-loaded
+ // library (and, thus, could be malloc'ed by the implementation).
+ auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
+ return std::all_of(Objects.begin(), Objects.end(),
+ [](Value *V){
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return AI->getParent() && AI->getParent()->getParent() &&
+ AI->isStaticAlloca();
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return (GV->hasLocalLinkage() ||
+ GV->hasHiddenVisibility() ||
+ GV->hasProtectedVisibility() ||
+ GV->hasUnnamedAddr()) &&
+ !GV->isThreadLocal();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+ });
+ };
+
+ if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
+ (IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
+ return ConstantInt::get(GetCompareTy(LHS),
+ !CmpInst::isTrueWhenEqual(Pred));
}
// Otherwise, fail.
@@ -1883,40 +2175,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, Q.DL))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, Q.DL))
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getTrue(ITy);
if (LHSKnownNonNegative)
return getFalse(ITy);
break;
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getTrue(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL))
+ if (LHSKnownNonNegative &&
+ isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getFalse(ITy);
if (LHSKnownNonNegative)
return getTrue(ITy);
break;
case ICmpInst::ICMP_SGT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (LHSKnownNegative)
return getFalse(ITy);
- if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL))
+ if (LHSKnownNonNegative &&
+ isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
}
@@ -1981,6 +2279,22 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Upper = Upper + 1;
assert(Upper != Lower && "Upper part of range has wrapped!");
}
+ } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) {
+ // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)]
+ Lower = CI2->getValue();
+ Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+ } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) {
+ if (CI2->isNegative()) {
+ // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2]
+ unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1;
+ Lower = CI2->getValue().shl(ShiftAmount);
+ Upper = CI2->getValue() + 1;
+ } else {
+ // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1]
+ unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1;
+ Lower = CI2->getValue();
+ Upper = CI2->getValue().shl(ShiftAmount) + 1;
+ }
} else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
// 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
APInt NegOne = APInt::getAllOnesValue(Width);
@@ -2189,25 +2503,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
- // If a bit is known to be zero for A and known to be one for B,
- // then A and B cannot be equal.
- if (ICmpInst::isEquality(Pred)) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- uint32_t BitWidth = CI->getBitWidth();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
- if (((LHSKnownOne & RHSKnownZero) != 0) ||
- ((LHSKnownZero & RHSKnownOne) != 0))
- return (Pred == ICmpInst::ICMP_EQ)
- ? ConstantInt::getFalse(CI->getContext())
- : ConstantInt::getTrue(CI->getContext());
- }
- }
-
// Special logic for binary operators.
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
@@ -2271,6 +2566,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // icmp pred (or X, Y), X
+ if (LBO && match(LBO, m_CombineOr(m_Or(m_Value(), m_Specific(RHS)),
+ m_Or(m_Specific(RHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_ULT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_UGE)
+ return getTrue(ITy);
+ }
+ // icmp pred X, (or X, Y)
+ if (RBO && match(RBO, m_CombineOr(m_Or(m_Value(), m_Specific(LHS)),
+ m_Or(m_Specific(LHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ }
+
+ // icmp pred (and X, Y), X
+ if (LBO && match(LBO, m_CombineOr(m_And(m_Value(), m_Specific(RHS)),
+ m_And(m_Specific(RHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_UGT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_ULE)
+ return getTrue(ITy);
+ }
+ // icmp pred X, (and X, Y)
+ if (RBO && match(RBO, m_CombineOr(m_And(m_Value(), m_Specific(LHS)),
+ m_And(m_Specific(LHS), m_Value())))) {
+ if (Pred == ICmpInst::ICMP_UGE)
+ return getTrue(ITy);
+ if (Pred == ICmpInst::ICMP_ULT)
+ return getFalse(ITy);
+ }
+
// 0 - (zext X) pred C
if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
@@ -2301,7 +2630,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2311,7 +2641,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2330,7 +2661,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2340,7 +2672,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL);
+ ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
// fall-through
@@ -2360,6 +2693,41 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
}
+ // handle:
+ // CI2 << X == CI
+ // CI2 << X != CI
+ //
+ // where CI2 is a power of 2 and CI isn't
+ if (auto *CI = dyn_cast<ConstantInt>(RHS)) {
+ const APInt *CI2Val, *CIVal = &CI->getValue();
+ if (LBO && match(LBO, m_Shl(m_APInt(CI2Val), m_Value())) &&
+ CI2Val->isPowerOf2()) {
+ if (!CIVal->isPowerOf2()) {
+ // CI2 << X can equal zero in some circumstances,
+ // this simplification is unsafe if CI is zero.
+ //
+ // We know it is safe if:
+ // - The shift is nsw, we can't shift out the one bit.
+ // - The shift is nuw, we can't shift out the one bit.
+ // - CI2 is one
+ // - CI isn't zero
+ if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() ||
+ *CI2Val == 1 || !CI->isZero()) {
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(RHS->getContext());
+ if (Pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(RHS->getContext());
+ }
+ }
+ if (CIVal->isSignBit() && *CI2Val == 1) {
+ if (Pred == ICmpInst::ICMP_UGT)
+ return ConstantInt::getFalse(RHS->getContext());
+ if (Pred == ICmpInst::ICMP_ULE)
+ return ConstantInt::getTrue(RHS->getContext());
+ }
+ }
+ }
+
if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
LBO->getOperand(1) == RBO->getOperand(1)) {
switch (LBO->getOpcode()) {
@@ -2607,6 +2975,23 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // If a bit is known to be zero for A and known to be one for B,
+ // then A and B cannot be equal.
+ if (ICmpInst::isEquality(Pred)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ uint32_t BitWidth = CI->getBitWidth();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
+ Q.CxtI, Q.DT);
+ const APInt &RHSVal = CI->getValue();
+ if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0))
+ return Pred == ICmpInst::ICMP_EQ
+ ? ConstantInt::getFalse(CI->getContext())
+ : ConstantInt::getTrue(CI->getContext());
+ }
+ }
+
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -2625,8 +3010,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ Instruction *CxtI) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -2722,8 +3108,9 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -2755,15 +3142,71 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
return TrueVal;
+ const auto *ICI = dyn_cast<ICmpInst>(CondVal);
+ unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
+ if (ICI && BitWidth) {
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ APInt MinSignedValue = APInt::getSignBit(BitWidth);
+ Value *X;
+ const APInt *Y;
+ bool TrueWhenUnset;
+ bool IsBitTest = false;
+ if (ICmpInst::isEquality(Pred) &&
+ match(ICI->getOperand(0), m_And(m_Value(X), m_APInt(Y))) &&
+ match(ICI->getOperand(1), m_Zero())) {
+ IsBitTest = true;
+ TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
+ } else if (Pred == ICmpInst::ICMP_SLT &&
+ match(ICI->getOperand(1), m_Zero())) {
+ X = ICI->getOperand(0);
+ Y = &MinSignedValue;
+ IsBitTest = true;
+ TrueWhenUnset = false;
+ } else if (Pred == ICmpInst::ICMP_SGT &&
+ match(ICI->getOperand(1), m_AllOnes())) {
+ X = ICI->getOperand(0);
+ Y = &MinSignedValue;
+ IsBitTest = true;
+ TrueWhenUnset = true;
+ }
+ if (IsBitTest) {
+ const APInt *C;
+ // (X & Y) == 0 ? X & ~Y : X --> X
+ // (X & Y) != 0 ? X & ~Y : X --> X & ~Y
+ if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
+ *Y == ~*C)
+ return TrueWhenUnset ? FalseVal : TrueVal;
+ // (X & Y) == 0 ? X : X & ~Y --> X & ~Y
+ // (X & Y) != 0 ? X : X & ~Y --> X
+ if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
+ *Y == ~*C)
+ return TrueWhenUnset ? FalseVal : TrueVal;
+
+ if (Y->isPowerOf2()) {
+ // (X & Y) == 0 ? X | Y : X --> X | Y
+ // (X & Y) != 0 ? X | Y : X --> X
+ if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
+ *Y == *C)
+ return TrueWhenUnset ? TrueVal : FalseVal;
+ // (X & Y) == 0 ? X : X | Y --> X
+ // (X & Y) != 0 ? X : X | Y --> X | Y
+ if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
+ *Y == *C)
+ return TrueWhenUnset ? TrueVal : FalseVal;
+ }
+ }
+ }
+
return nullptr;
}
Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (DL, TLI, DT),
- RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifySelectInst(Cond, TrueVal, FalseVal,
+ Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
@@ -2771,29 +3214,72 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
// The type of the GEP pointer operand.
PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
+ unsigned AS = PtrTy->getAddressSpace();
// getelementptr P -> P.
if (Ops.size() == 1)
return Ops[0];
- if (isa<UndefValue>(Ops[0])) {
- // Compute the (pointer) type returned by the GEP instruction.
- Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
- Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
- if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
- GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+ // Compute the (pointer) type returned by the GEP instruction.
+ Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+ Type *GEPTy = PointerType::get(LastType, AS);
+ if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
+ GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+
+ if (isa<UndefValue>(Ops[0]))
return UndefValue::get(GEPTy);
- }
if (Ops.size() == 2) {
// getelementptr P, 0 -> P.
if (match(Ops[1], m_Zero()))
return Ops[0];
- // getelementptr P, N -> P if P points to a type of zero size.
- if (Q.DL) {
- Type *Ty = PtrTy->getElementType();
- if (Ty->isSized() && Q.DL->getTypeAllocSize(Ty) == 0)
+
+ Type *Ty = PtrTy->getElementType();
+ if (Q.DL && Ty->isSized()) {
+ Value *P;
+ uint64_t C;
+ uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty);
+ // getelementptr P, N -> P if P points to a type of zero size.
+ if (TyAllocSize == 0)
return Ops[0];
+
+ // The following transforms are only safe if the ptrtoint cast
+ // doesn't truncate the pointers.
+ if (Ops[1]->getType()->getScalarSizeInBits() ==
+ Q.DL->getPointerSizeInBits(AS)) {
+ auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
+ if (match(P, m_Zero()))
+ return Constant::getNullValue(GEPTy);
+ Value *Temp;
+ if (match(P, m_PtrToInt(m_Value(Temp))))
+ if (Temp->getType() == GEPTy)
+ return Temp;
+ return nullptr;
+ };
+
+ // getelementptr V, (sub P, V) -> P if P points to a type of size 1.
+ if (TyAllocSize == 1 &&
+ match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0])))))
+ if (Value *R = PtrToIntOrZero(P))
+ return R;
+
+ // getelementptr V, (ashr (sub P, V), C) -> Q
+ // if P points to a type of size 1 << C.
+ if (match(Ops[1],
+ m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
+ m_ConstantInt(C))) &&
+ TyAllocSize == 1ULL << C)
+ if (Value *R = PtrToIntOrZero(P))
+ return R;
+
+ // getelementptr V, (sdiv (sub P, V), C) -> Q
+ // if P points to a type of size C.
+ if (match(Ops[1],
+ m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))),
+ m_SpecificInt(TyAllocSize))))
+ if (Value *R = PtrToIntOrZero(P))
+ return R;
+ }
}
}
@@ -2807,8 +3293,9 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
@@ -2840,12 +3327,11 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
return nullptr;
}
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
- ArrayRef<unsigned> Idxs,
- const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (DL, TLI, DT),
+Value *llvm::SimplifyInsertValueInst(
+ Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL,
+ const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -2892,8 +3378,10 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
//=== Helper functions for higher up the class hierarchy.
@@ -2965,8 +3453,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT), RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
@@ -2980,8 +3470,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -3055,24 +3546,25 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
User::op_iterator ArgEnd, const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT),
+ const TargetLibraryInfo *TLI, const DominatorTree *DT,
+ AssumptionCache *AC, const Instruction *CxtI) {
+ return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
const DataLayout *DL, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return ::SimplifyCall(V, Args.begin(), Args.end(), Query(DL, TLI, DT),
- RecursionLimit);
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyCall(V, Args.begin(), Args.end(),
+ Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
+ const DominatorTree *DT, AssumptionCache *AC) {
Value *Result;
switch (I->getOpcode()) {
@@ -3081,109 +3573,122 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
break;
case Instruction::FAdd:
Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Add:
Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+ TLI, DT, AC, I);
break;
case Instruction::FSub:
Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Sub:
Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+ TLI, DT, AC, I);
break;
case Instruction::FMul:
Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT);
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Mul:
- Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result =
+ SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::SDiv:
- Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::UDiv:
- Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::FDiv:
- Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::SRem:
- Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::URem:
- Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::FRem:
- Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
+ AC, I);
break;
case Instruction::Shl:
Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
- DL, TLI, DT);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
+ TLI, DT, AC, I);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(),
- DL, TLI, DT);
+ cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
+ AC, I);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(),
- DL, TLI, DT);
+ cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
+ AC, I);
break;
case Instruction::And:
- Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result =
+ SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::Or:
- Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result =
+ SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::Xor:
- Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result =
+ SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::ICmp:
- Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result =
+ SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), I->getOperand(0),
+ I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::FCmp:
- Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1), DL, TLI, DT);
+ Result =
+ SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0),
+ I->getOperand(1), DL, TLI, DT, AC, I);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
- I->getOperand(2), DL, TLI, DT);
+ I->getOperand(2), DL, TLI, DT, AC, I);
break;
case Instruction::GetElementPtr: {
SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
- Result = SimplifyGEPInst(Ops, DL, TLI, DT);
+ Result = SimplifyGEPInst(Ops, DL, TLI, DT, AC, I);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
IV->getInsertedValueOperand(),
- IV->getIndices(), DL, TLI, DT);
+ IV->getIndices(), DL, TLI, DT, AC, I);
break;
}
case Instruction::PHI:
- Result = SimplifyPHINode(cast<PHINode>(I), Query (DL, TLI, DT));
+ Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
break;
case Instruction::Call: {
CallSite CS(cast<CallInst>(I));
- Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
- DL, TLI, DT);
+ Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
+ TLI, DT, AC, I);
break;
}
case Instruction::Trunc:
- Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT);
+ Result =
+ SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
break;
}
@@ -3207,7 +3712,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
+ const DominatorTree *DT,
+ AssumptionCache *AC) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
@@ -3234,7 +3740,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
I = Worklist[Idx];
// See if this instruction simplifies.
- SimpleV = SimplifyInstruction(I, DL, TLI, DT);
+ SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC);
if (!SimpleV)
continue;
@@ -3257,18 +3763,19 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
return Simplified;
}
-bool llvm::recursivelySimplifyInstruction(Instruction *I,
- const DataLayout *DL,
+bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT);
+ const DominatorTree *DT,
+ AssumptionCache *AC) {
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC);
}
bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
+ const DominatorTree *DT,
+ AssumptionCache *AC) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
- return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT);
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC);
}
diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp
index b5b426533ff3..7aae2a5592e9 100644
--- a/lib/Analysis/JumpInstrTableInfo.cpp
+++ b/lib/Analysis/JumpInstrTableInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -28,7 +29,21 @@ ImmutablePass *llvm::createJumpInstrTableInfoPass() {
return new JumpInstrTableInfo();
}
-JumpInstrTableInfo::JumpInstrTableInfo() : ImmutablePass(ID), Tables() {
+ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) {
+ // This cast is always safe, since Bound is always in a subset of uint64_t.
+ uint64_t B = static_cast<uint64_t>(Bound);
+ return new JumpInstrTableInfo(B);
+}
+
+JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign)
+ : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) {
+ if (!llvm::isPowerOf2_64(ByteAlign)) {
+ // Note that we don't explicitly handle overflow here, since we handle the 0
+ // case explicitly when a caller actually tries to create jumptable entries,
+ // and this is the return value on overflow.
+ ByteAlignment = llvm::NextPowerOf2(ByteAlign);
+ }
+
initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index e0736162a77a..c8d0410c1e0f 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -48,7 +48,7 @@ static void findCallees(
}
for (Value *Op : C->operand_values())
- if (Visited.insert(cast<Constant>(Op)))
+ if (Visited.insert(cast<Constant>(Op)).second)
Worklist.push_back(cast<Constant>(Op));
}
}
@@ -66,7 +66,7 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
for (Instruction &I : BB)
for (Value *Op : I.operand_values())
if (Constant *C = dyn_cast<Constant>(Op))
- if (Visited.insert(C))
+ if (Visited.insert(C).second)
Worklist.push_back(C);
// We've collected all the constant (and thus potentially function or
@@ -113,7 +113,7 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
SmallPtrSet<Constant *, 16> Visited;
for (GlobalVariable &GV : M.globals())
if (GV.hasInitializer())
- if (Visited.insert(GV.getInitializer()))
+ if (Visited.insert(GV.getInitializer()).second)
Worklist.push_back(GV.getInitializer());
DEBUG(dbgs() << " Adding functions referenced by global initializers to the "
@@ -688,7 +688,7 @@ static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N,
SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) {
// Recurse depth first through the nodes.
for (LazyCallGraph::Node &ChildN : N)
- if (Printed.insert(&ChildN))
+ if (Printed.insert(&ChildN).second)
printNodes(OS, ChildN, Printed);
OS << " Call edges in function: " << N.getFunction().getName() << "\n";
@@ -708,21 +708,20 @@ static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) {
OS << "\n";
}
-PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
+PreservedAnalyses LazyCallGraphPrinterPass::run(Module &M,
ModuleAnalysisManager *AM) {
LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M);
- OS << "Printing the call graph for module: " << M->getModuleIdentifier()
+ OS << "Printing the call graph for module: " << M.getModuleIdentifier()
<< "\n\n";
SmallPtrSet<LazyCallGraph::Node *, 16> Printed;
for (LazyCallGraph::Node &N : G)
- if (Printed.insert(&N))
+ if (Printed.insert(&N).second)
printNodes(OS, N, Printed);
for (LazyCallGraph::SCC &SCC : G.postorder_sccs())
printSCC(OS, SCC);
return PreservedAnalyses::all();
-
}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 9f919f7644a3..4de56f195cb4 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -1,4 +1,4 @@
-//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//===- LazyValueInfo.cpp - Value constraint analysis ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,12 +15,14 @@
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -38,6 +40,7 @@ using namespace PatternMatch;
char LazyValueInfo::ID = 0;
INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
@@ -51,8 +54,7 @@ namespace llvm {
// LVILatticeVal
//===----------------------------------------------------------------------===//
-/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
-/// value.
+/// This is the information tracked by LazyValueInfo for each value.
///
/// FIXME: This is basically just for bringup, this can be made a lot more rich
/// in the future.
@@ -60,19 +62,19 @@ namespace llvm {
namespace {
class LVILatticeVal {
enum LatticeValueTy {
- /// undefined - This Value has no known value yet.
+ /// This Value has no known value yet.
undefined,
- /// constant - This Value has a specific constant value.
+ /// This Value has a specific constant value.
constant,
- /// notconstant - This Value is known to not have the specified value.
+
+ /// This Value is known to not have the specified value.
notconstant,
- /// constantrange - The Value falls within this range.
+ /// The Value falls within this range.
constantrange,
- /// overdefined - This value is not known to be constant, and we know that
- /// it has a value.
+ /// This value is not known to be constant, and we know that it has a value.
overdefined
};
@@ -125,7 +127,7 @@ public:
return Range;
}
- /// markOverdefined - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markOverdefined() {
if (isOverdefined())
return false;
@@ -133,7 +135,7 @@ public:
return true;
}
- /// markConstant - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markConstant(Constant *V) {
assert(V && "Marking constant with NULL");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
@@ -149,7 +151,7 @@ public:
return true;
}
- /// markNotConstant - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markNotConstant(Constant *V) {
assert(V && "Marking constant with NULL");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
@@ -167,7 +169,7 @@ public:
return true;
}
- /// markConstantRange - Return true if this is a change in status.
+ /// Return true if this is a change in status.
bool markConstantRange(const ConstantRange NewR) {
if (isConstantRange()) {
if (NewR.isEmptySet())
@@ -187,7 +189,7 @@ public:
return true;
}
- /// mergeIn - Merge the specified lattice value into this one, updating this
+ /// Merge the specified lattice value into this one, updating this
/// one and returning true if anything changed.
bool mergeIn(const LVILatticeVal &RHS) {
if (RHS.isUndefined() || isOverdefined()) return false;
@@ -295,8 +297,7 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
//===----------------------------------------------------------------------===//
namespace {
- /// LVIValueHandle - A callback value handle updates the cache when
- /// values are erased.
+ /// A callback value handle updates the cache when values are erased.
class LazyValueInfoCache;
struct LVIValueHandle : public CallbackVH {
LazyValueInfoCache *Parent;
@@ -312,59 +313,67 @@ namespace {
}
namespace {
- /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+ /// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
- /// ValueCacheEntryTy - This is all of the cached block information for
- /// exactly one Value*. The entries are sorted by the BasicBlock* of the
+ /// This is all of the cached block information for exactly one Value*.
+ /// The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
- /// ValueCache - This is all of the cached information for all values,
+ /// This is all of the cached information for all values,
/// mapped from Value* to key information.
std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
- /// OverDefinedCache - This tracks, on a per-block basis, the set of
- /// values that are over-defined at the end of that block. This is required
+ /// This tracks, on a per-block basis, the set of values that are
+ /// over-defined at the end of that block. This is required
/// for cache updating.
typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
DenseSet<OverDefinedPairTy> OverDefinedCache;
- /// SeenBlocks - Keep track of all blocks that we have ever seen, so we
+ /// Keep track of all blocks that we have ever seen, so we
/// don't spend time removing unused blocks from our caches.
DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
- /// BlockValueStack - This stack holds the state of the value solver
- /// during a query. It basically emulates the callstack of the naive
+ /// This stack holds the state of the value solver during a query.
+ /// It basically emulates the callstack of the naive
/// recursive value lookup process.
std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+
+ /// Keeps track of which block-value pairs are in BlockValueStack.
+ DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet;
+
+ /// Push BV onto BlockValueStack unless it's already in there.
+ /// Returns true on success.
+ bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) {
+ if (BlockValueSet.count(BV))
+ return false; // It's already in the stack.
+
+ BlockValueStack.push(BV);
+ BlockValueSet.insert(BV);
+ return true;
+ }
+
+ /// A pointer to the cache of @llvm.assume calls.
+ AssumptionCache *AC;
+ /// An optional DL pointer.
+ const DataLayout *DL;
+ /// An optional DT pointer.
+ DominatorTree *DT;
friend struct LVIValueHandle;
-
- /// OverDefinedCacheUpdater - A helper object that ensures that the
- /// OverDefinedCache is updated whenever solveBlockValue returns.
- struct OverDefinedCacheUpdater {
- LazyValueInfoCache *Parent;
- Value *Val;
- BasicBlock *BB;
- LVILatticeVal &BBLV;
-
- OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
- LazyValueInfoCache *P)
- : Parent(P), Val(V), BB(B), BBLV(LV) { }
-
- bool markResult(bool changed) {
- if (changed && BBLV.isOverdefined())
- Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
- return changed;
- }
- };
-
+ void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
+ SeenBlocks.insert(BB);
+ lookup(Val)[BB] = Result;
+ if (Result.isOverdefined())
+ OverDefinedCache.insert(std::make_pair(BB, Val));
+ }
LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
- LVILatticeVal &Result);
+ LVILatticeVal &Result,
+ Instruction *CxtI = nullptr);
bool hasBlockValue(Value *Val, BasicBlock *BB);
// These methods process one work item and may add more. A false value
@@ -377,6 +386,8 @@ namespace {
PHINode *PN, BasicBlock *BB);
bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
Instruction *BBI, BasicBlock *BB);
+ void mergeAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV,
+ Instruction *BBI);
void solve();
@@ -385,20 +396,26 @@ namespace {
}
public:
- /// getValueInBlock - This is the query interface to determine the lattice
+ /// This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
- LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+ LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB,
+ Instruction *CxtI = nullptr);
+
+ /// This is the query interface to determine the lattice
+ /// value for the specified Value* at the specified instruction (generally
+ /// from an assume intrinsic).
+ LVILatticeVal getValueAt(Value *V, Instruction *CxtI);
- /// getValueOnEdge - This is the query interface to determine the lattice
+ /// This is the query interface to determine the lattice
/// value for the specified Value* that is true on the specified edge.
- LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+ LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
+ Instruction *CxtI = nullptr);
- /// threadEdge - This is the update interface to inform the cache that an
- /// edge from PredBB to OldSucc has been threaded to be from PredBB to
- /// NewSucc.
+ /// This is the update interface to inform the cache that an edge from
+ /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
- /// eraseBlock - This is part of the update interface to inform the cache
+ /// This is part of the update interface to inform the cache
/// that a block has been deleted.
void eraseBlock(BasicBlock *BB);
@@ -408,6 +425,10 @@ namespace {
ValueCache.clear();
OverDefinedCache.clear();
}
+
+ LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr,
+ DominatorTree *DT = nullptr)
+ : AC(AC), DL(DL), DT(DT) {}
};
} // end anonymous namespace
@@ -415,17 +436,11 @@ void LVIValueHandle::deleted() {
typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
SmallVector<OverDefinedPairTy, 4> ToErase;
- for (DenseSet<OverDefinedPairTy>::iterator
- I = Parent->OverDefinedCache.begin(),
- E = Parent->OverDefinedCache.end();
- I != E; ++I) {
- if (I->second == getValPtr())
- ToErase.push_back(*I);
- }
-
- for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(),
- E = ToErase.end(); I != E; ++I)
- Parent->OverDefinedCache.erase(*I);
+ for (const OverDefinedPairTy &P : Parent->OverDefinedCache)
+ if (P.second == getValPtr())
+ ToErase.push_back(P);
+ for (const OverDefinedPairTy &P : ToErase)
+ Parent->OverDefinedCache.erase(P);
// This erasure deallocates *this, so it MUST happen after we're done
// using any and all members of *this.
@@ -440,15 +455,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
SeenBlocks.erase(I);
SmallVector<OverDefinedPairTy, 4> ToErase;
- for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
- E = OverDefinedCache.end(); I != E; ++I) {
- if (I->first == BB)
- ToErase.push_back(*I);
- }
-
- for (SmallVectorImpl<OverDefinedPairTy>::iterator I = ToErase.begin(),
- E = ToErase.end(); I != E; ++I)
- OverDefinedCache.erase(*I);
+ for (const OverDefinedPairTy& P : OverDefinedCache)
+ if (P.first == BB)
+ ToErase.push_back(P);
+ for (const OverDefinedPairTy &P : ToErase)
+ OverDefinedCache.erase(P);
for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
@@ -458,9 +469,18 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
void LazyValueInfoCache::solve() {
while (!BlockValueStack.empty()) {
std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+ assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
+
if (solveBlockValue(e.second, e.first)) {
- assert(BlockValueStack.top() == e);
+ // The work item was completely processed.
+ assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
+ assert(lookup(e.second).count(e.first) && "Result should be in cache!");
+
BlockValueStack.pop();
+ BlockValueSet.erase(e);
+ } else {
+ // More work needs to be done before revisiting.
+ assert(BlockValueStack.top() != e && "Stack should have been pushed!");
}
}
}
@@ -490,44 +510,40 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
if (isa<Constant>(Val))
return true;
- ValueCacheEntryTy &Cache = lookup(Val);
- SeenBlocks.insert(BB);
- LVILatticeVal &BBLV = Cache[BB];
-
- // OverDefinedCacheUpdater is a helper object that will update
- // the OverDefinedCache for us when this method exits. Make sure to
- // call markResult on it as we exist, passing a bool to indicate if the
- // cache needs updating, i.e. if we have solve a new value or not.
- OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
-
- // If we've already computed this block's value, return it.
- if (!BBLV.isUndefined()) {
- DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
-
- // Since we're reusing a cached value here, we don't need to update the
- // OverDefinedCahce. The cache will have been properly updated
- // whenever the cached value was inserted.
- ODCacheUpdater.markResult(false);
+ if (lookup(Val).count(BB)) {
+ // If we have a cached value, use that.
+ DEBUG(dbgs() << " reuse BB '" << BB->getName()
+ << "' val=" << lookup(Val)[BB] << '\n');
+
+ // Since we're reusing a cached value, we don't need to update the
+ // OverDefinedCache. The cache will have been properly updated whenever the
+ // cached value was inserted.
return true;
}
- // Otherwise, this is the first time we're seeing this block. Reset the
- // lattice value to overdefined, so that cycles will terminate and be
- // conservatively correct.
- BBLV.markOverdefined();
+ // Hold off inserting this value into the Cache in case we have to return
+ // false and come back later.
+ LVILatticeVal Res;
Instruction *BBI = dyn_cast<Instruction>(Val);
if (!BBI || BBI->getParent() != BB) {
- return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
+ if (!solveBlockValueNonLocal(Res, Val, BB))
+ return false;
+ insertResult(Val, BB, Res);
+ return true;
}
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
+ if (!solveBlockValuePHINode(Res, PN, BB))
+ return false;
+ insertResult(Val, BB, Res);
+ return true;
}
if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
- BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
- return ODCacheUpdater.markResult(true);
+ Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+ insertResult(Val, BB, Res);
+ return true;
}
// We can only analyze the definitions of certain classes of instructions
@@ -537,8 +553,9 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
!BBI->getType()->isIntegerTy()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because inst def found.\n");
- BBLV.markOverdefined();
- return ODCacheUpdater.markResult(true);
+ Res.markOverdefined();
+ insertResult(Val, BB, Res);
+ return true;
}
// FIXME: We're currently limited to binops with a constant RHS. This should
@@ -548,11 +565,15 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because inst def found.\n");
- BBLV.markOverdefined();
- return ODCacheUpdater.markResult(true);
+ Res.markOverdefined();
+ insertResult(Val, BB, Res);
+ return true;
}
- return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+ if (!solveBlockValueConstantRange(Res, BBI, BB))
+ return false;
+ insertResult(Val, BB, Res);
+ return true;
}
static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
@@ -597,9 +618,8 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
// If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
// inside InstructionDereferencesPointer either.
if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) {
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
- if (InstructionDereferencesPointer(BI, UnderlyingVal)) {
+ for (Instruction &I : *BB) {
+ if (InstructionDereferencesPointer(&I, UnderlyingVal)) {
NotNull = true;
break;
}
@@ -669,7 +689,10 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
BasicBlock *PhiBB = PN->getIncomingBlock(i);
Value *PhiVal = PN->getIncomingValue(i);
LVILatticeVal EdgeResult;
- EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+ // Note that we can provide PN as the context value to getEdgeValue, even
+ // though the results will be cached, because PN is the value being used as
+ // the cache key in the caller.
+ EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN);
if (EdgesMissing)
continue;
@@ -694,16 +717,53 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
return true;
}
+static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
+ LVILatticeVal &Result,
+ bool isTrueDest = true);
+
+// If we can determine a constant range for the value Val in the context
+// provided by the instruction BBI, then merge it into BBLV. If we did find a
+// constant range, return true.
+void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
+ LVILatticeVal &BBLV,
+ Instruction *BBI) {
+ BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
+ if (!BBI)
+ return;
+
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *I = cast<CallInst>(AssumeVH);
+ if (!isValidAssumeForContext(I, BBI, DL, DT))
+ continue;
+
+ Value *C = I->getArgOperand(0);
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(C)) {
+ LVILatticeVal Result;
+ if (getValueFromFromCondition(Val, ICI, Result)) {
+ if (BBLV.isOverdefined())
+ BBLV = Result;
+ else
+ BBLV.mergeIn(Result);
+ }
+ }
+ }
+}
+
bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
Instruction *BBI,
BasicBlock *BB) {
// Figure out the range of the LHS. If that fails, bail.
if (!hasBlockValue(BBI->getOperand(0), BB)) {
- BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
- return false;
+ if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+ return false;
+ BBLV.markOverdefined();
+ return true;
}
LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+ mergeAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
if (!LHSVal.isConstantRange()) {
BBLV.markOverdefined();
return true;
@@ -775,6 +835,47 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
return true;
}
+bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
+ LVILatticeVal &Result, bool isTrueDest) {
+ if (ICI && isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality() && ICI->getOperand(0) == Val) {
+ // We know that V has the RHS constant if this is a true SETEQ or
+ // false SETNE.
+ if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+ Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ else
+ Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ return true;
+ }
+
+ // Recognize the range checking idiom that InstCombine produces.
+ // (X-C1) u< C2 --> [C1, C1+C2)
+ ConstantInt *NegOffset = nullptr;
+ if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
+ match(ICI->getOperand(0), m_Add(m_Specific(Val),
+ m_ConstantInt(NegOffset)));
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
+ if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
+ // Calculate the range of values that would satisfy the comparison.
+ ConstantRange CmpRange(CI->getValue());
+ ConstantRange TrueValues =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+ if (NegOffset) // Apply the offset from above.
+ TrueValues = TrueValues.subtract(NegOffset->getValue());
+
+ // If we're interested in the false dest, invert the condition.
+ if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+ Result = LVILatticeVal::getRange(TrueValues);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
/// Val is not constrained on the edge.
static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
@@ -783,7 +884,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// know that v != 0.
if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
// If this is a conditional branch and only one successor goes to BBTo, then
- // we maybe able to infer something from the condition.
+ // we may be able to infer something from the condition.
if (BI->isConditional() &&
BI->getSuccessor(0) != BI->getSuccessor(1)) {
bool isTrueDest = BI->getSuccessor(0) == BBTo;
@@ -800,42 +901,9 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
- ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
- if (ICI && isa<Constant>(ICI->getOperand(1))) {
- if (ICI->isEquality() && ICI->getOperand(0) == Val) {
- // We know that V has the RHS constant if this is a true SETEQ or
- // false SETNE.
- if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
- Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
- else
- Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
- return true;
- }
-
- // Recognize the range checking idiom that InstCombine produces.
- // (X-C1) u< C2 --> [C1, C1+C2)
- ConstantInt *NegOffset = nullptr;
- if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
- match(ICI->getOperand(0), m_Add(m_Specific(Val),
- m_ConstantInt(NegOffset)));
-
- ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
- if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
- // Calculate the range of values that would satisfy the comparison.
- ConstantRange CmpRange(CI->getValue());
- ConstantRange TrueValues =
- ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
-
- if (NegOffset) // Apply the offset from above.
- TrueValues = TrueValues.subtract(NegOffset->getValue());
-
- // If we're interested in the false dest, invert the condition.
- if (!isTrueDest) TrueValues = TrueValues.inverse();
-
- Result = LVILatticeVal::getRange(TrueValues);
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
return true;
- }
- }
}
}
@@ -849,8 +917,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
unsigned BitWidth = Val->getType()->getIntegerBitWidth();
ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/);
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
+ for (SwitchInst::CaseIt i : SI->cases()) {
ConstantRange EdgeVal(i.getCaseValue()->getValue());
if (DefaultCase) {
// It is possible that the default destination is the destination of
@@ -866,10 +933,11 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
return false;
}
-/// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at
-/// the basic block if the edge does not constraint Val.
+/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
+/// the basic block if the edge does not constrain Val.
bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
- BasicBlock *BBTo, LVILatticeVal &Result) {
+ BasicBlock *BBTo, LVILatticeVal &Result,
+ Instruction *CxtI) {
// If already a constant, there is nothing to compute.
if (Constant *VC = dyn_cast<Constant>(Val)) {
Result = LVILatticeVal::get(VC);
@@ -878,19 +946,25 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
if (getEdgeValueLocal(Val, BBFrom, BBTo, Result)) {
if (!Result.isConstantRange() ||
- Result.getConstantRange().getSingleElement())
+ Result.getConstantRange().getSingleElement())
return true;
// FIXME: this check should be moved to the beginning of the function when
// LVI better supports recursive values. Even for the single value case, we
// can intersect to detect dead code (an empty range).
if (!hasBlockValue(Val, BBFrom)) {
- BlockValueStack.push(std::make_pair(BBFrom, Val));
- return false;
+ if (pushBlockValue(std::make_pair(BBFrom, Val)))
+ return false;
+ Result.markOverdefined();
+ return true;
}
// Try to intersect ranges of the BB and the constraint on the edge.
LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+ mergeAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator());
+ // See note on the use of the CxtI with mergeAssumeBlockValueConstantRange,
+ // and caching, below.
+ mergeAssumeBlockValueConstantRange(Val, InBlock, CxtI);
if (!InBlock.isConstantRange())
return true;
@@ -901,36 +975,64 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
}
if (!hasBlockValue(Val, BBFrom)) {
- BlockValueStack.push(std::make_pair(BBFrom, Val));
- return false;
+ if (pushBlockValue(std::make_pair(BBFrom, Val)))
+ return false;
+ Result.markOverdefined();
+ return true;
}
- // if we couldn't compute the value on the edge, use the value from the BB
+ // If we couldn't compute the value on the edge, use the value from the BB.
Result = getBlockValue(Val, BBFrom);
+ mergeAssumeBlockValueConstantRange(Val, Result, BBFrom->getTerminator());
+ // We can use the context instruction (generically the ultimate instruction
+ // the calling pass is trying to simplify) here, even though the result of
+ // this function is generally cached when called from the solve* functions
+ // (and that cached result might be used with queries using a different
+ // context instruction), because when this function is called from the solve*
+ // functions, the context instruction is not provided. When called from
+ // LazyValueInfoCache::getValueOnEdge, the context instruction is provided,
+ // but then the result is not cached.
+ mergeAssumeBlockValueConstantRange(Val, Result, CxtI);
return true;
}
-LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
+ Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
- BlockValueStack.push(std::make_pair(BB, V));
+ assert(BlockValueStack.empty() && BlockValueSet.empty());
+ pushBlockValue(std::make_pair(BB, V));
+
solve();
LVILatticeVal Result = getBlockValue(V, BB);
+ mergeAssumeBlockValueConstantRange(V, Result, CxtI);
+
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
+ DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
+ << CxtI->getName() << "'\n");
+
+ LVILatticeVal Result;
+ mergeAssumeBlockValueConstantRange(V, Result, CxtI);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
LVILatticeVal LazyValueInfoCache::
-getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
+ Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
LVILatticeVal Result;
- if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+ if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
solve();
- bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+ bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result, CxtI);
(void)WasFastQuery;
assert(WasFastQuery && "More work to do after problem solved?");
}
@@ -947,7 +1049,7 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
// we clear their entries from the cache, and allow lazy updating to recompute
// them when needed.
- // The updating process is fairly simple: we need to dropped cached info
+ // The updating process is fairly simple: we need to drop cached info
// for all values that were marked overdefined in OldSucc, and for those same
// values in any successor of OldSucc (except NewSucc) in which they were
// also marked overdefined.
@@ -955,11 +1057,9 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
worklist.push_back(OldSucc);
DenseSet<Value*> ClearSet;
- for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
- E = OverDefinedCache.end(); I != E; ++I) {
- if (I->first == OldSucc)
- ClearSet.insert(I->second);
- }
+ for (OverDefinedPairTy &P : OverDefinedCache)
+ if (P.first == OldSucc)
+ ClearSet.insert(P.second);
// Use a worklist to perform a depth-first search of OldSucc's successors.
// NOTE: We do not need a visited list since any blocks we have already
@@ -973,15 +1073,14 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
if (ToUpdate == NewSucc) continue;
bool changed = false;
- for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
- I != E; ++I) {
+ for (Value *V : ClearSet) {
// If a value was marked overdefined in OldSucc, and is here too...
DenseSet<OverDefinedPairTy>::iterator OI =
- OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+ OverDefinedCache.find(std::make_pair(ToUpdate, V));
if (OI == OverDefinedCache.end()) continue;
// Remove it from the caches.
- ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+ ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)];
ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
assert(CI != Entry.end() && "Couldn't find entry to update?");
@@ -1003,41 +1102,53 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
// LazyValueInfo Impl
//===----------------------------------------------------------------------===//
-/// getCache - This lazily constructs the LazyValueInfoCache.
-static LazyValueInfoCache &getCache(void *&PImpl) {
+/// This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
+ const DataLayout *DL = nullptr,
+ DominatorTree *DT = nullptr) {
if (!PImpl)
- PImpl = new LazyValueInfoCache();
+ PImpl = new LazyValueInfoCache(AC, DL, DT);
return *static_cast<LazyValueInfoCache*>(PImpl);
}
bool LazyValueInfo::runOnFunction(Function &F) {
- if (PImpl)
- getCache(PImpl).clear();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+
+ DominatorTreeWrapperPass *DTWP =
+ getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
+
TLI = &getAnalysis<TargetLibraryInfo>();
+ if (PImpl)
+ getCache(PImpl, AC, DL, DT).clear();
+
// Fully lazy.
return false;
}
void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfo>();
}
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
- delete &getCache(PImpl);
+ delete &getCache(PImpl, AC);
PImpl = nullptr;
}
}
-Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
- LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
-
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
+ Instruction *CxtI) {
+ LVILatticeVal Result =
+ getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI);
+
if (Result.isConstant())
return Result.getConstant();
if (Result.isConstantRange()) {
@@ -1048,12 +1159,14 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
return nullptr;
}
-/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// Determine whether the specified value is known to be a
/// constant on the specified edge. Return null if not.
Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
- BasicBlock *ToBB) {
- LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
-
+ BasicBlock *ToBB,
+ Instruction *CxtI) {
+ LVILatticeVal Result =
+ getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
if (Result.isConstant())
return Result.getConstant();
if (Result.isConstantRange()) {
@@ -1064,51 +1177,47 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
return nullptr;
}
-/// getPredicateOnEdge - Determine whether the specified value comparison
-/// with a constant is known to be true or false on the specified CFG edge.
-/// Pred is a CmpInst predicate.
-LazyValueInfo::Tristate
-LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
- BasicBlock *FromBB, BasicBlock *ToBB) {
- LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
-
+static LazyValueInfo::Tristate
+getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result,
+ const DataLayout *DL, TargetLibraryInfo *TLI) {
+
// If we know the value is a constant, evaluate the conditional.
Constant *Res = nullptr;
if (Result.isConstant()) {
Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
TLI);
if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
- return ResCI->isZero() ? False : True;
- return Unknown;
+ return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
+ return LazyValueInfo::Unknown;
}
if (Result.isConstantRange()) {
ConstantInt *CI = dyn_cast<ConstantInt>(C);
- if (!CI) return Unknown;
+ if (!CI) return LazyValueInfo::Unknown;
ConstantRange CR = Result.getConstantRange();
if (Pred == ICmpInst::ICMP_EQ) {
if (!CR.contains(CI->getValue()))
- return False;
+ return LazyValueInfo::False;
if (CR.isSingleElement() && CR.contains(CI->getValue()))
- return True;
+ return LazyValueInfo::True;
} else if (Pred == ICmpInst::ICMP_NE) {
if (!CR.contains(CI->getValue()))
- return True;
+ return LazyValueInfo::True;
if (CR.isSingleElement() && CR.contains(CI->getValue()))
- return False;
+ return LazyValueInfo::False;
}
// Handle more complex predicates.
ConstantRange TrueValues =
ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
if (TrueValues.contains(CR))
- return True;
+ return LazyValueInfo::True;
if (TrueValues.inverse().contains(CR))
- return False;
- return Unknown;
+ return LazyValueInfo::False;
+ return LazyValueInfo::Unknown;
}
if (Result.isNotConstant()) {
@@ -1120,26 +1229,48 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
Result.getNotConstant(), C, DL,
TLI);
if (Res->isNullValue())
- return False;
+ return LazyValueInfo::False;
} else if (Pred == ICmpInst::ICMP_NE) {
// !C1 != C -> true iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
Result.getNotConstant(), C, DL,
TLI);
if (Res->isNullValue())
- return True;
+ return LazyValueInfo::True;
}
- return Unknown;
+ return LazyValueInfo::Unknown;
}
- return Unknown;
+ return LazyValueInfo::Unknown;
+}
+
+/// Determine whether the specified value comparison with a constant is known to
+/// be true or false on the specified CFG edge. Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+ BasicBlock *FromBB, BasicBlock *ToBB,
+ Instruction *CxtI) {
+ LVILatticeVal Result =
+ getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+
+ return getPredicateResult(Pred, C, Result, DL, TLI);
+}
+
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
+ Instruction *CxtI) {
+ LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI);
+
+ return getPredicateResult(Pred, C, Result, DL, TLI);
}
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+ if (PImpl)
+ getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
}
void LazyValueInfo::eraseBlock(BasicBlock *BB) {
- if (PImpl) getCache(PImpl).eraseBlock(BB);
+ if (PImpl)
+ getCache(PImpl, AC, DL, DT).eraseBlock(BB);
}
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 7d4e254a111d..23639e71232d 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -18,7 +18,7 @@
#include "llvm/IR/Function.h"
using namespace llvm;
-/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This
+/// This impl pointer in ~LibCallInfo is actually a StringMap. This
/// helper does the cast.
static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
@@ -38,7 +38,7 @@ const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
}
-/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// Return the LibCallFunctionInfo object corresponding to
/// the specified function if we have it. If not, return null.
const LibCallFunctionInfo *
LibCallInfo::getFunctionInfo(const Function *F) const {
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index b14f3292e900..b5c7245030fc 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -37,6 +37,7 @@
#include "llvm/Analysis/Lint.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -96,11 +97,12 @@ namespace {
Value *findValue(Value *V, bool OffsetOk) const;
Value *findValueImpl(Value *V, bool OffsetOk,
- SmallPtrSet<Value *, 4> &Visited) const;
+ SmallPtrSetImpl<Value *> &Visited) const;
public:
Module *Mod;
AliasAnalysis *AA;
+ AssumptionCache *AC;
DominatorTree *DT;
const DataLayout *DL;
TargetLibraryInfo *TLI;
@@ -118,6 +120,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfo>();
AU.addRequired<DominatorTreeWrapperPass>();
}
@@ -151,6 +154,7 @@ namespace {
char Lint::ID = 0;
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -175,6 +179,7 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
bool Lint::runOnFunction(Function &F) {
Mod = F.getParent();
AA = &getAnalysis<AliasAnalysis>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
@@ -504,7 +509,8 @@ void Lint::visitShl(BinaryOperator &I) {
"Undefined result: Shift count out of range", &I);
}
-static bool isZero(Value *V, const DataLayout *DL) {
+static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
+ AssumptionCache *AC) {
// Assume undef could be zero.
if (isa<UndefValue>(V))
return true;
@@ -513,7 +519,8 @@ static bool isZero(Value *V, const DataLayout *DL) {
if (!VecTy) {
unsigned BitWidth = V->getType()->getIntegerBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL);
+ computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC,
+ dyn_cast<Instruction>(V), DT);
return KnownZero.isAllOnesValue();
}
@@ -543,22 +550,22 @@ static bool isZero(Value *V, const DataLayout *DL) {
}
void Lint::visitSDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitUDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitSRem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
void Lint::visitURem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL),
+ Assert1(!isZero(I.getOperand(1), DL, DT, AC),
"Undefined behavior: Division by zero", &I);
}
@@ -622,9 +629,9 @@ Value *Lint::findValue(Value *V, bool OffsetOk) const {
/// findValueImpl - Implementation helper for findValue.
Value *Lint::findValueImpl(Value *V, bool OffsetOk,
- SmallPtrSet<Value *, 4> &Visited) const {
+ SmallPtrSetImpl<Value *> &Visited) const {
// Detect self-referential values.
- if (!Visited.insert(V))
+ if (!Visited.insert(V).second)
return UndefValue::get(V->getType());
// TODO: Look through sext or zext cast, when the result is known to
@@ -638,7 +645,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
BasicBlock *BB = L->getParent();
SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
for (;;) {
- if (!VisitedBlocks.insert(BB)) break;
+ if (!VisitedBlocks.insert(BB).second)
+ break;
if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
BB, BBI, 6, AA))
return findValueImpl(U, OffsetOk, Visited);
@@ -678,7 +686,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
- if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT))
+ if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
return findValueImpl(W, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 005d309894c1..5042eb9777ac 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -22,25 +22,29 @@
#include "llvm/IR/Operator.h"
using namespace llvm;
-/// AreEquivalentAddressValues - Test if A and B will obviously have the same
-/// value. This includes recognizing that %t0 and %t1 will have the same
+/// \brief Test if A and B will obviously have the same value.
+///
+/// This includes recognizing that %t0 and %t1 will have the same
/// value in code like this:
+/// \code
/// %t0 = getelementptr \@a, 0, 3
/// store i32 0, i32* %t0
/// %t1 = getelementptr \@a, 0, 3
/// %t2 = load i32* %t1
+/// \endcode
///
static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
// Test if the values are trivially equivalent.
- if (A == B) return true;
+ if (A == B)
+ return true;
// Test if the values come from identical arithmetic instructions.
// Use isIdenticalToWhenDefined instead of isIdenticalTo because
// this function is only used when one address use dominates the
// other, which means that they'll always either have the same
// value or one of them will have an undefined value.
- if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
- isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+ if (isa<BinaryOperator>(A) || isa<CastInst>(A) || isa<PHINode>(A) ||
+ isa<GetElementPtrInst>(A))
if (const Instruction *BI = dyn_cast<Instruction>(B))
if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
return true;
@@ -49,15 +53,19 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
return false;
}
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap. If it is not obviously safe to load from the
-/// specified pointer, we do a quick local scan of the basic block containing
-/// ScanFrom, to determine if the address is already accessed.
+/// \brief Check if executing a load of this pointer value cannot trap.
+///
+/// If it is not obviously safe to load from the specified pointer, we do
+/// a quick local scan of the basic block containing \c ScanFrom, to determine
+/// if the address is already accessed.
+///
+/// This uses the pointee type to determine how many bytes need to be safe to
+/// load from the pointer.
bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
- unsigned Align, const DataLayout *TD) {
+ unsigned Align, const DataLayout *DL) {
int64_t ByteOffset = 0;
Value *Base = V;
- Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD);
+ Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
if (ByteOffset < 0) // out of bounds
return false;
@@ -69,26 +77,29 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
BaseType = AI->getAllocatedType();
BaseAlign = AI->getAlignment();
} else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
- // Global variables are safe to load from but their size cannot be
- // guaranteed if they are overridden.
+ // Global variables are not necessarily safe to load from if they are
+ // overridden. Their size may change or they may be weak and require a test
+ // to determine if they were in fact provided.
if (!GV->mayBeOverridden()) {
BaseType = GV->getType()->getElementType();
BaseAlign = GV->getAlignment();
}
}
- if (BaseType && BaseType->isSized()) {
- if (TD && BaseAlign == 0)
- BaseAlign = TD->getPrefTypeAlignment(BaseType);
+ PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
- if (Align <= BaseAlign) {
- if (!TD)
- return true; // Loading directly from an alloca or global is OK.
+ // If we found a base allocated type from either an alloca or global variable,
+ // try to see if we are definitively within the allocated region. We need to
+ // know the size of the base type and the loaded type to do anything in this
+ // case, so only try this when we have the DataLayout available.
+ if (BaseType && BaseType->isSized() && DL) {
+ if (BaseAlign == 0)
+ BaseAlign = DL->getPrefTypeAlignment(BaseType);
+ if (Align <= BaseAlign) {
// Check if the load is within the bounds of the underlying object.
- PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
- if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+ if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
return true;
}
@@ -101,6 +112,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
// the load entirely).
BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+ // We can at least always strip pointer casts even though we can't use the
+ // base here.
+ V = V->stripPointerCasts();
+
while (BBI != E) {
--BBI;
@@ -110,46 +125,67 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
!isa<DbgInfoIntrinsic>(BBI))
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
- }
+ Value *AccessedPtr;
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+ AccessedPtr = LI->getPointerOperand();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+ AccessedPtr = SI->getPointerOperand();
+ else
+ continue;
+
+ // Handle trivial cases even w/o DataLayout or other work.
+ if (AccessedPtr == V)
+ return true;
+
+ if (!DL)
+ continue;
+
+ auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
+ if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
+ LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
+ return true;
}
return false;
}
-/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
-/// instruction before ScanFrom) checking to see if we have the value at the
+/// \brief Scan the ScanBB block backwards to see if we have the value at the
/// memory address *Ptr locally available within a small number of instructions.
-/// If the value is available, return it.
///
-/// If not, return the iterator for the last validated instruction that the
-/// value would be live through. If we scanned the entire block and didn't find
-/// something that invalidates *Ptr or provides it, ScanFrom would be left at
-/// begin() and this returns null. ScanFrom could also be left
+/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum
+/// instructions to scan in the block. If it is set to \c 0, it will scan the whole
+/// block.
///
-/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
-/// it is set to 0, it will scan the whole block. You can also optionally
-/// specify an alias analysis implementation, which makes this more precise.
+/// If the value is available, this function returns it. If not, it returns the
+/// iterator for the last validated instruction that the value would be live
+/// through. If we scanned the entire block and didn't find something that
+/// invalidates \c *Ptr or provides it, \c ScanFrom is left at the last
+/// instruction processed and this returns null.
///
-/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
-/// load or store is recorded there. If there is no TBAA tag or if no access
-/// is found, it is left unmodified.
+/// You can also optionally specify an alias analysis implementation, which
+/// makes this more precise.
+///
+/// If \c AATags is non-null and a load or store is found, the AA tags from the
+/// load or store are recorded there. If there are no AA tags or if no access is
+/// found, it is left unmodified.
Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AliasAnalysis *AA,
- MDNode **TBAATag) {
- if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
-
- // If we're using alias analysis to disambiguate get the size of *Ptr.
- uint64_t AccessSize = 0;
- if (AA) {
- Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- AccessSize = AA->getTypeStoreSize(AccessTy);
- }
-
+ AliasAnalysis *AA, AAMDNodes *AATags) {
+ if (MaxInstsToScan == 0)
+ MaxInstsToScan = ~0U;
+
+ Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+
+ // Try to get the DataLayout for this module. This may be null, in which case
+ // the optimizations will be limited.
+ const DataLayout *DL = ScanBB->getDataLayout();
+
+ // Try to get the store size for the type.
+ uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy)
+ : AA ? AA->getTypeStoreSize(AccessTy) : 0;
+
+ Value *StrippedPtr = Ptr->stripPointerCasts();
+
while (ScanFrom != ScanBB->begin()) {
// We must ignore debug info directives when counting (otherwise they
// would affect codegen).
@@ -159,62 +195,72 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// Restore ScanFrom to expected value in case next test succeeds
ScanFrom++;
-
+
// Don't scan huge blocks.
- if (MaxInstsToScan-- == 0) return nullptr;
-
+ if (MaxInstsToScan-- == 0)
+ return nullptr;
+
--ScanFrom;
// If this is a load of Ptr, the loaded value is available.
// (This is true even if the load is volatile or atomic, although
// those cases are unlikely.)
if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
- if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+ if (AreEquivalentAddressValues(
+ LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
+ CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) {
+ if (AATags)
+ LI->getAAMetadata(*AATags);
return LI;
}
-
+
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
// If this is a store through Ptr, the value is available!
// (This is true even if the store is volatile or atomic, although
// those cases are unlikely.)
- if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
- if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
+ if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
+ CastInst::isBitOrNoopPointerCastable(SI->getValueOperand()->getType(),
+ AccessTy, DL)) {
+ if (AATags)
+ SI->getAAMetadata(*AATags);
return SI->getOperand(0);
}
-
- // If Ptr is an alloca and this is a store to a different alloca, ignore
- // the store. This is a trivial form of alias analysis that is important
- // for reg2mem'd code.
- if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
- (isa<AllocaInst>(SI->getOperand(1)) ||
- isa<GlobalVariable>(SI->getOperand(1))))
+
+ // If both StrippedPtr and StorePtr reach all the way to an alloca or
+ // global and they are different, ignore the store. This is a trivial form
+ // of alias analysis that is important for reg2mem'd code.
+ if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
+ (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)) &&
+ StrippedPtr != StorePtr)
continue;
-
+
// If we have alias analysis and it says the store won't modify the loaded
// value, ignore the store.
if (AA &&
- (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
-
+
// Otherwise the store that may or may not alias the pointer, bail out.
++ScanFrom;
return nullptr;
}
-
+
// If this is some other instruction that may clobber Ptr, bail out.
if (Inst->mayWriteToMemory()) {
// If alias analysis claims that it really won't modify the load,
// ignore it.
if (AA &&
- (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
+ AliasAnalysis::Mod) == 0)
continue;
-
+
// May modify the pointer, bail out.
++ScanFrom;
return nullptr;
}
}
-
+
// Got to the start of the block, we didn't find it, but are done for this
// block.
return nullptr;
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 46c0eaabe1a3..b1f62c437326 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -307,7 +308,8 @@ bool Loop::isAnnotatedParallel() const {
// directly or indirectly through another list metadata (in case of
// nested parallel loops). The loop identifier metadata refers to
// itself so we can check both cases with the same routine.
- MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access");
+ MDNode *loopIdMD =
+ II->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
if (!loopIdMD)
return false;
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 7bd866e73e10..190abc73fbb4 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -76,6 +76,9 @@ void LPPassManager::deleteLoopFromQueue(Loop *L) {
LI->updateUnloop(L);
+ // Notify passes that the loop is being deleted.
+ deleteSimpleAnalysisLoop(L);
+
// If L is current loop then skip rest of the passes and let
// runOnFunction remove L from LQ. Otherwise, remove L from LQ now
// and continue applying other passes on CurrentLoop.
@@ -164,6 +167,14 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
}
}
+/// Invoke deleteAnalysisLoop hook for all passes.
+void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+ LoopPass *LP = getContainedPass(Index);
+ LP->deleteAnalysisLoop(L);
+ }
+}
+
// Recurse through all subloops and all loops into LQ.
static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 10da3d5d6187..ffc9fe64cc52 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -92,7 +92,6 @@ const char *const MemDepPrinter::DepTypeStr[]
bool MemDepPrinter::runOnFunction(Function &F) {
this->F = &F;
- AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
// All this code uses non-const interfaces because MemDep is not
@@ -119,30 +118,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
}
} else {
SmallVector<NonLocalDepResult, 4> NLDI;
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- if (!LI->isUnordered()) {
- // FIXME: Handle atomic/volatile loads.
- Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
- static_cast<BasicBlock *>(nullptr)));
- continue;
- }
- AliasAnalysis::Location Loc = AA.getLocation(LI);
- MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (!SI->isUnordered()) {
- // FIXME: Handle atomic/volatile stores.
- Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
- static_cast<BasicBlock *>(nullptr)));
- continue;
- }
- AliasAnalysis::Location Loc = AA.getLocation(SI);
- MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
- AliasAnalysis::Location Loc = AA.getLocation(VI);
- MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
- } else {
- llvm_unreachable("Unknown memory instruction!");
- }
+ assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
+ isa<VAArgInst>(Inst)) && "Unknown memory instruction!");
+ MDA.getNonLocalPointerDependency(Inst, NLDI);
DepSet &InstDeps = Deps[Inst];
for (SmallVectorImpl<NonLocalDepResult>::const_iterator
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 64d339f564d7..08b41fee4459 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -332,7 +332,11 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
TLIFn == LibFunc::ZdlPv || // operator delete(void*)
TLIFn == LibFunc::ZdaPv) // operator delete[](void*)
ExpectedNumParams = 1;
- else if (TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+ else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint)
+ TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong)
+ TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+ TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint)
+ TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong)
TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow)
ExpectedNumParams = 2;
else
@@ -412,7 +416,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If we have already seen this instruction, bail out. Cycles can happen in
// unreachable code after constant propagation.
- if (!SeenInsts.insert(I))
+ if (!SeenInsts.insert(I).second)
return unknown();
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
@@ -648,7 +652,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
// Record the pointers that were handled in this run, so that they can be
// cleaned later if something fails. We also use this set to break cycles that
// can occur in dead code.
- if (!SeenVals.insert(V)) {
+ if (!SeenVals.insert(V).second) {
Result = unknown();
} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
Result = visitGEPOperator(*GEP);
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 9eaf10958694..c505aa488170 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
@@ -48,13 +49,17 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
"Number of block queries that were completely cached");
// Limit for the number of instructions to scan in a block.
-static const int BlockScanLimit = 100;
+static const unsigned int BlockScanLimit = 100;
+
+// Limit on the number of memdep results to process.
+static const unsigned int NumResultsLimit = 100;
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
@@ -83,11 +88,13 @@ void MemoryDependenceAnalysis::releaseMemory() {
///
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredTransitive<AliasAnalysis>();
}
-bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
@@ -158,29 +165,32 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
return AliasAnalysis::Mod;
}
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ AAMDNodes AAInfo;
+
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
+ II->getAAMetadata(AAInfo);
Loc = AliasAnalysis::Location(II->getArgOperand(1),
cast<ConstantInt>(II->getArgOperand(0))
- ->getZExtValue(),
- II->getMetadata(LLVMContext::MD_tbaa));
+ ->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return AliasAnalysis::Mod;
case Intrinsic::invariant_end:
+ II->getAAMetadata(AAInfo);
Loc = AliasAnalysis::Location(II->getArgOperand(2),
cast<ConstantInt>(II->getArgOperand(1))
- ->getZExtValue(),
- II->getMetadata(LLVMContext::MD_tbaa));
+ ->getZExtValue(), AAInfo);
// These intrinsics don't really modify the memory, but returning Mod
// will allow them to be handled conservatively.
return AliasAnalysis::Mod;
default:
break;
}
+ }
// Otherwise, just do the coarse-grained thing that always works.
if (Inst->mayWriteToMemory())
@@ -367,6 +377,36 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
bool isInvariantLoad = false;
+
+ // We must be careful with atomic accesses, as they may allow another thread
+ // to touch this location, cloberring it. We are conservative: if the
+ // QueryInst is not a simple (non-atomic) memory access, we automatically
+ // return getClobber.
+ // If it is simple, we know based on the results of
+ // "Compiler testing via a theory of sound optimisations in the C11/C++11
+ // memory model" in PLDI 2013, that a non-atomic location can only be
+ // clobbered between a pair of a release and an acquire action, with no
+ // access to the location in between.
+ // Here is an example for giving the general intuition behind this rule.
+ // In the following code:
+ // store x 0;
+ // release action; [1]
+ // acquire action; [4]
+ // %val = load x;
+ // It is unsafe to replace %val by 0 because another thread may be running:
+ // acquire action; [2]
+ // store x 42;
+ // release action; [3]
+ // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
+ // being 42. A key property of this program however is that if either
+ // 1 or 4 were missing, there would be a race between the store of 42
+ // either the store of 0 or the load (making the whole progam racy).
+ // The paper mentionned above shows that the same property is respected
+ // by every program that can detect any optimisation of that kind: either
+ // it is racy (undefined) or there is a release followed by an acquire
+ // between the pair of accesses under consideration.
+ bool HasSeenAcquire = false;
+
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
@@ -404,10 +444,37 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
+ // One exception is atomic loads: a value can depend on an atomic load that it
+ // does not alias with when this atomic load indicates that another thread may
+ // be accessing the location.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Atomic loads have complications involved.
+ // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
+ // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
+ // release store will know to return getClobber.
// FIXME: This is overly conservative.
- if (!LI->isUnordered())
+ if (!LI->isUnordered()) {
+ if (!QueryInst)
+ return MemDepResult::getClobber(LI);
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+ if (!QueryLI->isSimple())
+ return MemDepResult::getClobber(LI);
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+ if (!QuerySI->isSimple())
+ return MemDepResult::getClobber(LI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(LI);
+ }
+
+ if (isAtLeastAcquire(LI->getOrdering()))
+ HasSeenAcquire = true;
+ }
+
+ // FIXME: this is overly conservative.
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses can for example be reordered
+ // with volatile accesses.
+ if (LI->isVolatile())
return MemDepResult::getClobber(LI);
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
@@ -466,8 +533,32 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
+ // A Monotonic store is OK if the query inst is itself not atomic.
+ // A Release (or higher) store further requires that no acquire load
+ // has been seen.
// FIXME: This is overly conservative.
- if (!SI->isUnordered())
+ if (!SI->isUnordered()) {
+ if (!QueryInst)
+ return MemDepResult::getClobber(SI);
+ if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
+ if (!QueryLI->isSimple())
+ return MemDepResult::getClobber(SI);
+ } else if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst)) {
+ if (!QuerySI->isSimple())
+ return MemDepResult::getClobber(SI);
+ } else if (QueryInst->mayReadOrWriteMemory()) {
+ return MemDepResult::getClobber(SI);
+ }
+
+ if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
+ return MemDepResult::getClobber(SI);
+ }
+
+ // FIXME: this is overly conservative.
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses can for example be reordered
+ // with volatile accesses.
+ if (SI->isVolatile())
return MemDepResult::getClobber(SI);
// If alias analysis can tell that this store is guaranteed to not modify
@@ -685,7 +776,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
DirtyBlocks.pop_back();
// Already processed this block?
- if (!Visited.insert(DirtyBB))
+ if (!Visited.insert(DirtyBB).second)
continue;
// Do a binary search to see if we already have an entry for this block in
@@ -768,14 +859,65 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
/// own block.
///
void MemoryDependenceAnalysis::
-getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
- BasicBlock *FromBB,
+getNonLocalPointerDependency(Instruction *QueryInst,
SmallVectorImpl<NonLocalDepResult> &Result) {
+
+ auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) {
+ if (auto *I = dyn_cast<LoadInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<StoreInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<VAArgInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
+ return AA->getLocation(I);
+ else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
+ return AA->getLocation(I);
+ else
+ llvm_unreachable("unsupported memory instruction");
+ };
+
+ const AliasAnalysis::Location Loc = getLocation(AA, QueryInst);
+ bool isLoad = isa<LoadInst>(QueryInst);
+ BasicBlock *FromBB = QueryInst->getParent();
+ assert(FromBB);
+
assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
+
+ // This routine does not expect to deal with volatile instructions.
+ // Doing so would require piping through the QueryInst all the way through.
+ // TODO: volatiles can't be elided, but they can be reordered with other
+ // non-volatile accesses.
+ auto isVolatile = [](Instruction *Inst) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->isVolatile();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return SI->isVolatile();
+ }
+ return false;
+ };
+ // We currently give up on any instruction which is ordered, but we do handle
+ // atomic instructions which are unordered.
+ // TODO: Handle ordered instructions
+ auto isOrdered = [](Instruction *Inst) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return !LI->isUnordered();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ return !SI->isUnordered();
+ }
+ return false;
+ };
+ if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
+ Result.push_back(NonLocalDepResult(FromBB,
+ MemDepResult::getUnknown(),
+ const_cast<Value *>(Loc.Ptr)));
+ return;
+ }
- PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL);
+
+ PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
@@ -861,7 +1003,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
return Dep;
}
-/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain
/// number of elements in the array that are already properly ordered. This is
/// optimized for the case when only a few entries are added.
static void
@@ -922,10 +1064,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Set up a temporary NLPI value. If the map doesn't yet have an entry for
// CacheKey, this value will be inserted as the associated value. Otherwise,
// it'll be ignored, and we'll have to check to see if the cached size and
- // tbaa tag are consistent with the current query.
+ // aa tags are consistent with the current query.
NonLocalPointerInfo InitialNLPI;
InitialNLPI.Size = Loc.Size;
- InitialNLPI.TBAATag = Loc.TBAATag;
+ InitialNLPI.AATags = Loc.AATags;
// Get the NLPI for CacheKey, inserting one into the map if it doesn't
// already have one.
@@ -955,21 +1097,21 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SkipFirstBlock);
}
- // If the query's TBAATag is inconsistent with the cached one,
+ // If the query's AATags are inconsistent with the cached one,
// conservatively throw out the cached data and restart the query with
// no tag if needed.
- if (CacheInfo->TBAATag != Loc.TBAATag) {
- if (CacheInfo->TBAATag) {
+ if (CacheInfo->AATags != Loc.AATags) {
+ if (CacheInfo->AATags) {
CacheInfo->Pair = BBSkipFirstBlockPair();
- CacheInfo->TBAATag = nullptr;
+ CacheInfo->AATags = AAMDNodes();
for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
if (Instruction *Inst = DI->getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
CacheInfo->NonLocalDeps.clear();
}
- if (Loc.TBAATag)
- return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+ if (Loc.AATags)
+ return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(),
isLoad, StartBB, Result, Visited,
SkipFirstBlock);
}
@@ -1045,6 +1187,24 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
while (!Worklist.empty()) {
BasicBlock *BB = Worklist.pop_back_val();
+ // If we do process a large number of blocks it becomes very expensive and
+ // likely it isn't worth worrying about
+ if (Result.size() > NumResultsLimit) {
+ Worklist.clear();
+ // Sort it now (if needed) so that recursive invocations of
+ // getNonLocalPointerDepFromBB and other routines that could reuse the
+ // cache value will only see properly sorted cache arrays.
+ if (Cache && NumSortedEntries != Cache->size()) {
+ SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+ }
+ // Since we bail out, the "Cache" set won't contain all of the
+ // results for the query. This is ok (we can still use it to accelerate
+ // specific block queries) but we can't do the fastpath "return all
+ // results from the set". Clear out the indicator for this.
+ CacheInfo->Pair = BBSkipFirstBlockPair();
+ return true;
+ }
+
// Skip the first block if we have it.
if (!SkipFirstBlock) {
// Analyze the dependency of *Pointer in FromBB. See if we already have
@@ -1251,7 +1411,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
if (I->getBB() != BB)
continue;
- assert(I->getResult().isNonLocal() &&
+ assert((I->getResult().isNonLocal() || !DT->isReachableFromEntry(BB)) &&
"Should only be here with transparent block");
I->setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
@@ -1369,14 +1529,11 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
// RemInst can't be the terminator if it has local stuff depending on it.
- assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+ assert(!ReverseDepIt->second.empty() && !isa<TerminatorInst>(RemInst) &&
"Nothing can locally depend on a terminator");
- for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
- E = ReverseDeps.end(); I != E; ++I) {
- Instruction *InstDependingOnRemInst = *I;
+ for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
assert(InstDependingOnRemInst != RemInst &&
"Already removed our local dep info");
@@ -1402,12 +1559,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
if (ReverseDepIt != ReverseNonLocalDeps.end()) {
- SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
- for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
- I != E; ++I) {
- assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+ for (Instruction *I : ReverseDepIt->second) {
+ assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
- PerInstNLInfo &INLD = NonLocalDeps[*I];
+ PerInstNLInfo &INLD = NonLocalDeps[I];
// The information is now dirty!
INLD.second = true;
@@ -1419,7 +1574,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
DI->setResult(NewDirtyVal);
if (Instruction *NextI = NewDirtyVal.getInst())
- ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+ ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
}
}
@@ -1438,12 +1593,9 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
ReverseNonLocalPtrDeps.find(RemInst);
if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
- SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
- for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
- E = Set.end(); I != E; ++I) {
- ValueIsLoadPair P = *I;
+ for (ValueIsLoadPair P : ReversePtrDepIt->second) {
assert(P.getPointer() != RemInst &&
"Already removed NonLocalPointerDeps info for RemInst");
@@ -1484,8 +1636,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
DEBUG(verifyRemoved(RemInst));
}
/// verifyRemoved - Verify that the specified instruction does not occur
-/// in our internal data structures.
+/// in our internal data structures. This function verifies by asserting in
+/// debug builds.
void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+#ifndef NDEBUG
for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
E = LocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
@@ -1514,18 +1668,16 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
E = ReverseLocalDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
E = ReverseNonLocalDeps.end();
I != E; ++I) {
assert(I->first != D && "Inst occurs in data structures");
- for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
- EE = I->second.end(); II != EE; ++II)
- assert(*II != D && "Inst occurs in data structures");
+ for (Instruction *Inst : I->second)
+ assert(Inst != D && "Inst occurs in data structures");
}
for (ReverseNonLocalPtrDepTy::const_iterator
@@ -1533,11 +1685,10 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
assert(I->first != D && "Inst occurs in rev NLPD map");
- for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
- E = I->second.end(); II != E; ++II)
- assert(*II != ValueIsLoadPair(D, false) &&
- *II != ValueIsLoadPair(D, true) &&
+ for (ValueIsLoadPair P : I->second)
+ assert(P != ValueIsLoadPair(D, false) &&
+ P != ValueIsLoadPair(D, true) &&
"Inst occurs in ReverseNonLocalPtrDeps map");
}
-
+#endif
}
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 139fa38b8a94..c214d3cdf17a 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -57,8 +57,9 @@ namespace {
Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
ModRefResult &Mask) override {
Mask = ModRef;
- return Location(CS.getArgument(ArgIdx), UnknownSize,
- CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa));
+ AAMDNodes AATags;
+ CS->getAAMetadata(AATags);
+ return Location(CS.getArgument(ArgIdx), UnknownSize, AATags);
}
ModRefResult getModRefInfo(ImmutableCallSite CS,
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index bfe86425119b..a534418d199e 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT)) {
+ if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AC)) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
@@ -283,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
}
// See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT)) {
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) {
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
@@ -369,7 +369,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
SmallVectorImpl<Instruction*> &NewInsts) {
// See if we have a version of this value already available and dominating
// PredBB. If so, there is no need to insert a new instance of it.
- PHITransAddr Tmp(InVal, DL);
+ PHITransAddr Tmp(InVal, DL, AC);
if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
return Tmp.getAddr();
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
index 1b0f359e6366..68c7535ea594 100644
--- a/lib/Analysis/PtrUseVisitor.cpp
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) {
for (Use &U : I.uses()) {
- if (VisitedUses.insert(&U)) {
+ if (VisitedUses.insert(&U).second) {
UseToVisit NewU = {
UseToVisit::UseAndIsOffsetKnownPair(&U, IsOffsetKnown),
Offset
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 08ebf0d85723..8cd85348fdcc 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -10,10 +10,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index de34b727a5a0..6fa7b2e0e30c 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -15,9 +15,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Support/Timer.h"
-
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
using namespace llvm;
#define DEBUG_TYPE "regionpassmgr"
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 06dbde58c108..86852d634ff2 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1,4 +1,4 @@
-//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -59,9 +59,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -78,6 +80,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -113,6 +116,7 @@ VerifySCEV("verify-scev",
INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
@@ -671,7 +675,254 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
}
}
+namespace {
+struct FindSCEVSize {
+ int Size;
+ FindSCEVSize() : Size(0) {}
+
+ bool follow(const SCEV *S) {
+ ++Size;
+ // Keep looking at all operands of S.
+ return true;
+ }
+ bool isDone() const {
+ return false;
+ }
+};
+}
+
+// Returns the size of the SCEV S.
+static inline int sizeOfSCEV(const SCEV *S) {
+ FindSCEVSize F;
+ SCEVTraversal<FindSCEVSize> ST(F);
+ ST.visitAll(S);
+ return F.Size;
+}
+
+namespace {
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
+public:
+ // Computes the Quotient and Remainder of the division of Numerator by
+ // Denominator.
+ static void divide(ScalarEvolution &SE, const SCEV *Numerator,
+ const SCEV *Denominator, const SCEV **Quotient,
+ const SCEV **Remainder) {
+ assert(Numerator && Denominator && "Uninitialized SCEV");
+
+ SCEVDivision D(SE, Numerator, Denominator);
+
+ // Check for the trivial case here to avoid having to check for it in the
+ // rest of the code.
+ if (Numerator == Denominator) {
+ *Quotient = D.One;
+ *Remainder = D.Zero;
+ return;
+ }
+
+ if (Numerator->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = D.Zero;
+ return;
+ }
+
+ // Split the Denominator when it is a product.
+ if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
+ const SCEV *Q, *R;
+ *Quotient = Numerator;
+ for (const SCEV *Op : T->operands()) {
+ divide(SE, *Quotient, Op, &Q, &R);
+ *Quotient = Q;
+
+ // Bail out when the Numerator is not divisible by one of the terms of
+ // the Denominator.
+ if (!R->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = Numerator;
+ return;
+ }
+ }
+ *Remainder = D.Zero;
+ return;
+ }
+
+ D.visit(Numerator);
+ *Quotient = D.Quotient;
+ *Remainder = D.Remainder;
+ }
+
+ // Except in the trivial case described above, we do not know how to divide
+ // Expr by Denominator for the following functions with empty implementation.
+ void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
+ void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
+ void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
+ void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+ void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+ void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+ void visitUnknown(const SCEVUnknown *Numerator) {}
+ void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+
+ void visitConstant(const SCEVConstant *Numerator) {
+ if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+ APInt NumeratorVal = Numerator->getValue()->getValue();
+ APInt DenominatorVal = D->getValue()->getValue();
+ uint32_t NumeratorBW = NumeratorVal.getBitWidth();
+ uint32_t DenominatorBW = DenominatorVal.getBitWidth();
+
+ if (NumeratorBW > DenominatorBW)
+ DenominatorVal = DenominatorVal.sext(NumeratorBW);
+ else if (NumeratorBW < DenominatorBW)
+ NumeratorVal = NumeratorVal.sext(DenominatorBW);
+
+ APInt QuotientVal(NumeratorVal.getBitWidth(), 0);
+ APInt RemainderVal(NumeratorVal.getBitWidth(), 0);
+ APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal);
+ Quotient = SE.getConstant(QuotientVal);
+ Remainder = SE.getConstant(RemainderVal);
+ return;
+ }
+ }
+
+ void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
+ const SCEV *StartQ, *StartR, *StepQ, *StepR;
+ assert(Numerator->isAffine() && "Numerator should be affine");
+ divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
+ divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+ Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
+ Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
+ }
+
+ void visitAddExpr(const SCEVAddExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs, Rs;
+ Type *Ty = Denominator->getType();
+
+ for (const SCEV *Op : Numerator->operands()) {
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
+
+ // Bail out if types do not match.
+ if (Ty != Q->getType() || Ty != R->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ Qs.push_back(Q);
+ Rs.push_back(R);
+ }
+
+ if (Qs.size() == 1) {
+ Quotient = Qs[0];
+ Remainder = Rs[0];
+ return;
+ }
+
+ Quotient = SE.getAddExpr(Qs);
+ Remainder = SE.getAddExpr(Rs);
+ }
+
+ void visitMulExpr(const SCEVMulExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs;
+ Type *Ty = Denominator->getType();
+
+ bool FoundDenominatorTerm = false;
+ for (const SCEV *Op : Numerator->operands()) {
+ // Bail out if types do not match.
+ if (Ty != Op->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ if (FoundDenominatorTerm) {
+ Qs.push_back(Op);
+ continue;
+ }
+
+ // Check whether Denominator divides one of the product operands.
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
+ if (!R->isZero()) {
+ Qs.push_back(Op);
+ continue;
+ }
+
+ // Bail out if types do not match.
+ if (Ty != Q->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ FoundDenominatorTerm = true;
+ Qs.push_back(Q);
+ }
+
+ if (FoundDenominatorTerm) {
+ Remainder = Zero;
+ if (Qs.size() == 1)
+ Quotient = Qs[0];
+ else
+ Quotient = SE.getMulExpr(Qs);
+ return;
+ }
+
+ if (!isa<SCEVUnknown>(Denominator)) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+ // The Remainder is obtained by replacing Denominator by 0 in Numerator.
+ ValueToValueMap RewriteMap;
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(Zero)->getValue();
+ Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+
+ if (Remainder->isZero()) {
+ // The Quotient is obtained by replacing Denominator by 1 in Numerator.
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(One)->getValue();
+ Quotient =
+ SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+ return;
+ }
+
+ // Quotient is (Numerator - Remainder) divided by Denominator.
+ const SCEV *Q, *R;
+ const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
+ if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
+ // This SCEV does not seem to simplify: fail the division here.
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+ divide(SE, Diff, Denominator, &Q, &R);
+ assert(R == Zero &&
+ "(Numerator - Remainder) should evenly divide Denominator");
+ Quotient = Q;
+ }
+
+private:
+ SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
+ const SCEV *Denominator)
+ : SE(S), Denominator(Denominator) {
+ Zero = SE.getConstant(Denominator->getType(), 0);
+ One = SE.getConstant(Denominator->getType(), 1);
+
+ // By default, we don't know how to divide Expr by Denominator.
+ // Providing the default here simplifies the rest of the code.
+ Quotient = Zero;
+ Remainder = Numerator;
+ }
+
+ ScalarEvolution &SE;
+ const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
+};
+
+}
//===----------------------------------------------------------------------===//
// Simple SCEV method implementations
@@ -1486,6 +1737,36 @@ namespace {
};
}
+// We're trying to construct a SCEV of type `Type' with `Ops' as operands and
+// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of
+// can't-overflow flags for the operation if possible.
+static SCEV::NoWrapFlags
+StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
+ const SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags OldFlags) {
+ using namespace std::placeholders;
+
+ bool CanAnalyze =
+ Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
+ (void)CanAnalyze;
+ assert(CanAnalyze && "don't call from other places!");
+
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap =
+ ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
+
+ // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
+ auto IsKnownNonNegative =
+ std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
+
+ if (SignOrUnsignWrap == SCEV::FlagNSW &&
+ std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
+ return ScalarEvolution::setFlags(OldFlags,
+ (SCEV::NoWrapFlags)SignOrUnsignMask);
+
+ return OldFlags;
+}
+
/// getAddExpr - Get a canonical add expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
@@ -1501,20 +1782,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVAddExpr operand types don't match!");
#endif
- // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- // And vice-versa.
- int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
- SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
- if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
- bool All = true;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
- E = Ops.end(); I != E; ++I)
- if (!isKnownNonNegative(*I)) {
- All = false;
- break;
- }
- if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- }
+ Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
@@ -1889,6 +2157,25 @@ static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
return r;
}
+/// Determine if any of the operands in this SCEV are a constant or if
+/// any of the add or multiply expressions in this SCEV contain a constant.
+static bool containsConstantSomewhere(const SCEV *StartExpr) {
+ SmallVector<const SCEV *, 4> Ops;
+ Ops.push_back(StartExpr);
+ while (!Ops.empty()) {
+ const SCEV *CurrentExpr = Ops.pop_back_val();
+ if (isa<SCEVConstant>(*CurrentExpr))
+ return true;
+
+ if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
+ const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
+ for (const SCEV *Operand : CurrentNAry->operands())
+ Ops.push_back(Operand);
+ }
+ }
+ return false;
+}
+
/// getMulExpr - Get a canonical multiply expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
@@ -1904,20 +2191,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
"SCEVMulExpr operand types don't match!");
#endif
- // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- // And vice-versa.
- int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
- SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
- if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
- bool All = true;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
- E = Ops.end(); I != E; ++I)
- if (!isKnownNonNegative(*I)) {
- All = false;
- break;
- }
- if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- }
+ Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
// Sort by complexity, this groups all similar expression types together.
GroupByComplexity(Ops, LI);
@@ -1928,11 +2202,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// C1*(C2+V) -> C1*C2 + C1*V
if (Ops.size() == 2)
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
- if (Add->getNumOperands() == 2 &&
- isa<SCEVConstant>(Add->getOperand(0)))
- return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
- getMulExpr(LHSC, Add->getOperand(1)));
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+ // If any of Add's ops are Adds or Muls with a constant,
+ // apply this transformation as well.
+ if (Add->getNumOperands() == 2)
+ if (containsConstantSomewhere(Add))
+ return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+ getMulExpr(LHSC, Add->getOperand(1)));
++Idx;
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
@@ -2061,71 +2337,66 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Okay, if there weren't any loop invariants to be folded, check to see if
// there are multiple AddRec's with the same loop induction variable being
// multiplied together. If so, we can fold them.
+
+ // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+ // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+ // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+ // ]]],+,...up to x=2n}.
+ // Note that the arguments to choose() are always integers with values
+ // known at compile time, never SCEV objects.
+ //
+ // The implementation avoids pointless extra computations when the two
+ // addrec's are of different length (mathematically, it's equivalent to
+ // an infinite stream of zeros on the right).
+ bool OpsModified = false;
for (unsigned OtherIdx = Idx+1;
- OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
- if (AddRecLoop != cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop())
+ const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+ if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
continue;
- // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
- // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
- // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
- // ]]],+,...up to x=2n}.
- // Note that the arguments to choose() are always integers with values
- // known at compile time, never SCEV objects.
- //
- // The implementation avoids pointless extra computations when the two
- // addrec's are of different length (mathematically, it's equivalent to
- // an infinite stream of zeros on the right).
- bool OpsModified = false;
- for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
- ++OtherIdx) {
- const SCEVAddRecExpr *OtherAddRec =
- dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
- if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
- continue;
-
- bool Overflow = false;
- Type *Ty = AddRec->getType();
- bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
- SmallVector<const SCEV*, 7> AddRecOps;
- for (int x = 0, xe = AddRec->getNumOperands() +
- OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
- const SCEV *Term = getConstant(Ty, 0);
- for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
- uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
- for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
- ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
- z < ze && !Overflow; ++z) {
- uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
- uint64_t Coeff;
- if (LargerThan64Bits)
- Coeff = umul_ov(Coeff1, Coeff2, Overflow);
- else
- Coeff = Coeff1*Coeff2;
- const SCEV *CoeffTerm = getConstant(Ty, Coeff);
- const SCEV *Term1 = AddRec->getOperand(y-z);
- const SCEV *Term2 = OtherAddRec->getOperand(z);
- Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
- }
+ bool Overflow = false;
+ Type *Ty = AddRec->getType();
+ bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+ SmallVector<const SCEV*, 7> AddRecOps;
+ for (int x = 0, xe = AddRec->getNumOperands() +
+ OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
+ const SCEV *Term = getConstant(Ty, 0);
+ for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+ uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+ for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+ ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+ z < ze && !Overflow; ++z) {
+ uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+ uint64_t Coeff;
+ if (LargerThan64Bits)
+ Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+ else
+ Coeff = Coeff1*Coeff2;
+ const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+ const SCEV *Term1 = AddRec->getOperand(y-z);
+ const SCEV *Term2 = OtherAddRec->getOperand(z);
+ Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
}
- AddRecOps.push_back(Term);
- }
- if (!Overflow) {
- const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
- SCEV::FlagAnyWrap);
- if (Ops.size() == 2) return NewAddRec;
- Ops[Idx] = NewAddRec;
- Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
- OpsModified = true;
- AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
- if (!AddRec)
- break;
}
+ AddRecOps.push_back(Term);
+ }
+ if (!Overflow) {
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+ SCEV::FlagAnyWrap);
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = NewAddRec;
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ OpsModified = true;
+ AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
+ if (!AddRec)
+ break;
}
- if (OpsModified)
- return getMulExpr(Ops);
}
+ if (OpsModified)
+ return getMulExpr(Ops);
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
@@ -2386,20 +2657,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// meaningful BE count at this point (and if we don't, we'd be stuck
// with a SCEVCouldNotCompute as the cached BE count).
- // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- // And vice-versa.
- int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
- SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
- if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
- bool All = true;
- for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
- E = Operands.end(); I != E; ++I)
- if (!isKnownNonNegative(*I)) {
- All = false;
- break;
- }
- if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
- }
+ Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
@@ -3082,7 +3340,8 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
Visited.insert(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -3263,7 +3522,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, DL, TLI, DT))
+ if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
@@ -3395,7 +3654,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
return Zeros.countTrailingOnes();
}
@@ -3403,6 +3662,33 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
return 0;
}
+/// GetRangeFromMetadata - Helper method to assign a range to V from
+/// metadata present in the IR.
+static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
+ ConstantRange TotalRange(
+ cast<IntegerType>(I->getType())->getBitWidth(), false);
+
+ unsigned NumRanges = MD->getNumOperands() / 2;
+ assert(NumRanges >= 1);
+
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
+ ConstantRange Range(Lower->getValue(), Upper->getValue());
+ TotalRange = TotalRange.unionWith(Range);
+ }
+
+ return TotalRange;
+ }
+ }
+
+ return None;
+}
+
/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
///
ConstantRange
@@ -3532,9 +3818,14 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // Check if the IR explicitly contains !range metadata.
+ Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+ if (MDRange.hasValue())
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+
// For a SCEVUnknown, ask ValueTracking.
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
if (Ones == ~Zeros + 1)
return setUnsignedRange(U, ConservativeResult);
return setUnsignedRange(U,
@@ -3683,10 +3974,15 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // Check if the IR explicitly contains !range metadata.
+ Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+ if (MDRange.hasValue())
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+
// For a SCEVUnknown, ask ValueTracking.
if (!U->getValue()->getType()->isIntegerTy() && !DL)
return setSignedRange(U, ConservativeResult);
- unsigned NS = ComputeNumSignBits(U->getValue(), DL);
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
if (NS <= 1)
return setSignedRange(U, ConservativeResult);
return setSignedRange(U, ConservativeResult.intersectWith(
@@ -3793,7 +4089,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC,
+ nullptr, DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4070,6 +4367,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
+ if (BasicBlock *ExitingBB = L->getExitingBlock())
+ return getSmallConstantTripCount(L, ExitingBB);
+
+ // No trip count information for multiple exits.
+ return 0;
+}
+
/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
/// normal unsigned value. Returns 0 if the trip count is unknown or not
/// constant. Will also return 0 if the maximum trip count is very large (>=
@@ -4080,19 +4385,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
/// before taking the branch. For loops with multiple exits, it may not be the
/// number times that the loop header executes because the loop may exit
/// prematurely via another branch.
-///
-/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
-/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
-/// loop exits. getExitCount() may return an exact count for this branch
-/// assuming no-signed-wrap. The number of well-defined iterations may actually
-/// be higher than this trip count if this exit test is skipped and the loop
-/// exits via a different branch. Ideally, getExitCount() would know whether it
-/// depends on a NSW assumption, and we would only fall back to a conservative
-/// trip count in that case.
-unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+ BasicBlock *ExitingBlock) {
+ assert(ExitingBlock && "Must pass a non-null exiting block!");
+ assert(L->isLoopExiting(ExitingBlock) &&
+ "Exiting block must actually branch out of the loop!");
const SCEVConstant *ExitCount =
- dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
+ dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
if (!ExitCount)
return 0;
@@ -4106,6 +4405,14 @@ getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
return ((unsigned)ExitConst->getZExtValue()) + 1;
}
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
+ if (BasicBlock *ExitingBB = L->getExitingBlock())
+ return getSmallConstantTripMultiple(L, ExitingBB);
+
+ // No trip multiple information for multiple exits.
+ return 0;
+}
+
/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
/// trip count of this loop as a normal unsigned value, if possible. This
/// means that the actual trip count is always a multiple of the returned
@@ -4118,9 +4425,13 @@ getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) {
///
/// As explained in the comments for getSmallConstantTripCount, this assumes
/// that control exits the loop via ExitingBlock.
-unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) {
- const SCEV *ExitCount = getBackedgeTakenCount(L);
+unsigned
+ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+ BasicBlock *ExitingBlock) {
+ assert(ExitingBlock && "Must pass a non-null exiting block!");
+ assert(L->isLoopExiting(ExitingBlock) &&
+ "Exiting block must actually branch out of the loop!");
+ const SCEV *ExitCount = getExitCount(L, ExitingBlock);
if (ExitCount == getCouldNotCompute())
return 1;
@@ -4230,7 +4541,8 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -4282,7 +4594,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -4316,7 +4629,8 @@ void ScalarEvolution::forgetValue(Value *V) {
SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
I = Worklist.pop_back_val();
- if (!Visited.insert(I)) continue;
+ if (!Visited.insert(I).second)
+ continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
@@ -4467,20 +4781,12 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
// non-exiting iterations. Partition the loop exits into two kinds:
// LoopMustExits and LoopMayExits.
//
- // A LoopMustExit meets two requirements:
- //
- // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit
- // test condition cannot be skipped (the tested variable has unit stride or
- // the test is less-than or greater-than, rather than a strict inequality).
- //
- // (b) It must dominate the loop latch, hence must be tested on every loop
- // iteration.
- //
- // If any computable LoopMustExit is found, then MaxBECount is the minimum
- // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is
- // conservatively the maximum EL.Max, where CouldNotCompute is considered
- // greater than any computable EL.Max.
- if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch &&
+ // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
+ // is a LoopMayExit. If any computable LoopMustExit is found, then
+ // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
+ // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
+ // considered greater than any computable EL.Max.
+ if (EL.Max != getCouldNotCompute() && Latch &&
DT->dominates(ExitBB, Latch)) {
if (!MustExitMaxBECount)
MustExitMaxBECount = EL.Max;
@@ -4567,18 +4873,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
return getCouldNotCompute();
}
+ bool IsOnlyExit = (L->getExitingBlock() != nullptr);
TerminatorInst *Term = ExitingBlock->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
assert(BI->isConditional() && "If unconditional, it can't be in loop!");
// Proceed to the next level to examine the exit condition expression.
return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
BI->getSuccessor(1),
- /*IsSubExpr=*/false);
+ /*ControlsExit=*/IsOnlyExit);
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
- /*IsSubExpr=*/false);
+ /*ControlsExit=*/IsOnlyExit);
return getCouldNotCompute();
}
@@ -4587,28 +4894,27 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
///
-/// @param IsSubExpr is true if ExitCond does not directly control the exit
-/// branch. In this case, we cannot assume that the loop only exits when the
-/// condition is true and cannot infer that failing to meet the condition prior
-/// to integer wraparound results in undefined behavior.
+/// @param ControlsExit is true if ExitCond directly controls the exit
+/// branch. In this case, we can assume that the loop exits only if the
+/// condition is true and can infer that failing to meet the condition prior to
+/// integer wraparound results in undefined behavior.
ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
Value *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool IsSubExpr) {
+ bool ControlsExit) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
bool EitherMayExit = L->contains(TBB);
ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- bool MustExit = false;
if (EitherMayExit) {
// Both conditions must be true for the loop to continue executing.
// Choose the less conservative count.
@@ -4623,7 +4929,6 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
else
MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
- MustExit = EL0.MustExit || EL1.MustExit;
} else {
// Both conditions must be true at the same time for the loop to exit.
// For now, be conservative.
@@ -4632,21 +4937,19 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
if (EL0.Exact == EL1.Exact)
BECount = EL0.Exact;
- MustExit = EL0.MustExit && EL1.MustExit;
}
- return ExitLimit(BECount, MaxBECount, MustExit);
+ return ExitLimit(BECount, MaxBECount);
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
bool EitherMayExit = L->contains(FBB);
ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- IsSubExpr || EitherMayExit);
+ ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- bool MustExit = false;
if (EitherMayExit) {
// Both conditions must be false for the loop to continue executing.
// Choose the less conservative count.
@@ -4661,7 +4964,6 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
else
MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
- MustExit = EL0.MustExit || EL1.MustExit;
} else {
// Both conditions must be false at the same time for the loop to exit.
// For now, be conservative.
@@ -4670,17 +4972,16 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
MaxBECount = EL0.Max;
if (EL0.Exact == EL1.Exact)
BECount = EL0.Exact;
- MustExit = EL0.MustExit && EL1.MustExit;
}
- return ExitLimit(BECount, MaxBECount, MustExit);
+ return ExitLimit(BECount, MaxBECount);
}
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
+ return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4707,7 +5008,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
ICmpInst *ExitCond,
BasicBlock *TBB,
BasicBlock *FBB,
- bool IsSubExpr) {
+ bool ControlsExit) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
@@ -4759,7 +5060,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -4772,14 +5073,14 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT: { // while (X < Y)
bool IsSigned = Cond == ICmpInst::ICMP_SLT;
- ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_UGT: { // while (X > Y)
bool IsSigned = Cond == ICmpInst::ICMP_SGT;
- ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
+ ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
if (EL.hasAnyInfo()) return EL;
break;
}
@@ -4801,7 +5102,7 @@ ScalarEvolution::ExitLimit
ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
SwitchInst *Switch,
BasicBlock *ExitingBlock,
- bool IsSubExpr) {
+ bool ControlsExit) {
assert(!L->contains(ExitingBlock) && "Not an exiting block!");
// Give up if the exit is the default dest of a switch.
@@ -4814,7 +5115,7 @@ ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
// while (X != Y) --> while (X-Y != 0)
- ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
if (EL.hasAnyInfo())
return EL;
@@ -5687,7 +5988,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
/// effectively V != 0. We know and take advantage of the fact that this
/// expression only being used in a comparison by zero context.
ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
@@ -5781,38 +6082,34 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
else
MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
: -CR.getUnsignedMin());
- return ExitLimit(Distance, MaxBECount, /*MustExit=*/true);
- }
-
- // If the recurrence is known not to wraparound, unsigned divide computes the
- // back edge count. (Ideally we would have an "isexact" bit for udiv). We know
- // that the value will either become zero (and thus the loop terminates), that
- // the loop will terminate through some other exit condition first, or that
- // the loop has undefined behavior. This means we can't "miss" the exit
- // value, even with nonunit stride, and exit later via the same branch. Note
- // that we can skip this exit if loop later exits via a different
- // branch. Hence MustExit=false.
- //
- // This is only valid for expressions that directly compute the loop exit. It
- // is invalid for subexpressions in which the loop may exit through this
- // branch even if this subexpression is false. In that case, the trip count
- // computed by this udiv could be smaller than the number of well-defined
- // iterations.
- if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+ return ExitLimit(Distance, MaxBECount);
+ }
+
+ // As a special case, handle the instance where Step is a positive power of
+ // two. In this case, determining whether Step divides Distance evenly can be
+ // done by counting and comparing the number of trailing zeros of Step and
+ // Distance.
+ if (!CountDown) {
+ const APInt &StepV = StepC->getValue()->getValue();
+ // StepV.isPowerOf2() returns true if StepV is an positive power of two. It
+ // also returns true if StepV is maximally negative (eg, INT_MIN), but that
+ // case is not handled as this code is guarded by !CountDown.
+ if (StepV.isPowerOf2() &&
+ GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros())
+ return getUDivExactExpr(Distance, Step);
+ }
+
+ // If the condition controls loop exit (the loop exits only if the expression
+ // is true) and the addition is no-wrap we can use unsigned divide to
+ // compute the backedge count. In this case, the step may not divide the
+ // distance, but we don't care because if the condition is "missed" the loop
+ // will have undefined behavior due to wrapping.
+ if (ControlsExit && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
const SCEV *Exact =
- getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- return ExitLimit(Exact, Exact, /*MustExit=*/false);
+ getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+ return ExitLimit(Exact, Exact);
}
- // If Step is a power of two that evenly divides Start we know that the loop
- // will always terminate. Start may not be a constant so we just have the
- // number of trailing zeros available. This is safe even in presence of
- // overflow as the recurrence will overflow to exactly 0.
- const APInt &StepV = StepC->getValue()->getValue();
- if (StepV.isPowerOf2() &&
- GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros())
- return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -6309,19 +6606,33 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return true;
+ if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return false;
BranchInst *LoopContinuePredicate =
dyn_cast<BranchInst>(Latch->getTerminator());
- if (!LoopContinuePredicate ||
- LoopContinuePredicate->isUnconditional())
- return false;
+ if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
+ isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
+ LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
+ return true;
+
+ // Check conditions due to any @llvm.assume intrinsics.
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *CI = cast<CallInst>(AssumeVH);
+ if (!DT->dominates(CI, Latch->getTerminator()))
+ continue;
- return isImpliedCond(Pred, LHS, RHS,
- LoopContinuePredicate->getCondition(),
- LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+ if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+ return true;
+ }
+
+ return false;
}
/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
@@ -6335,6 +6646,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
// (interprocedural conditions notwithstanding).
if (!L) return false;
+ if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
@@ -6355,6 +6668,18 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
return true;
}
+ // Check conditions due to any @llvm.assume intrinsics.
+ for (auto &AssumeVH : AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ auto *CI = cast<CallInst>(AssumeVH);
+ if (!DT->dominates(CI, L->getHeader()))
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+ return true;
+ }
+
return false;
}
@@ -6469,6 +6794,66 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
RHS, LHS, FoundLHS, FoundRHS);
}
+ // Check if we can make progress by sharpening ranges.
+ if (FoundPred == ICmpInst::ICMP_NE &&
+ (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
+
+ const SCEVConstant *C = nullptr;
+ const SCEV *V = nullptr;
+
+ if (isa<SCEVConstant>(FoundLHS)) {
+ C = cast<SCEVConstant>(FoundLHS);
+ V = FoundRHS;
+ } else {
+ C = cast<SCEVConstant>(FoundRHS);
+ V = FoundLHS;
+ }
+
+ // The guarding predicate tells us that C != V. If the known range
+ // of V is [C, t), we can sharpen the range to [C + 1, t). The
+ // range we consider has to correspond to same signedness as the
+ // predicate we're interested in folding.
+
+ APInt Min = ICmpInst::isSigned(Pred) ?
+ getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
+
+ if (Min == C->getValue()->getValue()) {
+ // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
+ // This is true even if (Min + 1) wraps around -- in case of
+ // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
+
+ APInt SharperMin = Min + 1;
+
+ switch (Pred) {
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE:
+ // We know V `Pred` SharperMin. If this implies LHS `Pred`
+ // RHS, we're done.
+ if (isImpliedCondOperands(Pred, LHS, RHS, V,
+ getConstant(SharperMin)))
+ return true;
+
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT:
+ // We know from the range information that (V `Pred` Min ||
+ // V == Min). We know from the guarding condition that !(V
+ // == Min). This gives us
+ //
+ // V `Pred` Min || V == Min && !(V == Min)
+ // => V `Pred` Min
+ //
+ // If V `Pred` Min implies LHS `Pred` RHS, we're done.
+
+ if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
+ return true;
+
+ default:
+ // No change
+ break;
+ }
+ }
+ }
+
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
@@ -6498,6 +6883,85 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
getNotSCEV(FoundLHS));
}
+
+/// If Expr computes ~A, return A else return nullptr
+static const SCEV *MatchNotExpr(const SCEV *Expr) {
+ const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
+ if (!Add || Add->getNumOperands() != 2) return nullptr;
+
+ const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0));
+ if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue()))
+ return nullptr;
+
+ const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
+ if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr;
+
+ const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0));
+ if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue()))
+ return nullptr;
+
+ return AddRHS->getOperand(1);
+}
+
+
+/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
+template<typename MaxExprType>
+static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
+ const SCEV *Candidate) {
+ const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
+ if (!MaxExpr) return false;
+
+ auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate);
+ return It != MaxExpr->op_end();
+}
+
+
+/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
+template<typename MaxExprType>
+static bool IsMinConsistingOf(ScalarEvolution &SE,
+ const SCEV *MaybeMinExpr,
+ const SCEV *Candidate) {
+ const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
+ if (!MaybeMaxExpr)
+ return false;
+
+ return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
+}
+
+
+/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
+/// expression?
+static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
+ ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ switch (Pred) {
+ default:
+ return false;
+
+ case ICmpInst::ICMP_SGE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_SLE:
+ return
+ // min(A, ...) <= A
+ IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
+
+ case ICmpInst::ICMP_UGE:
+ std::swap(LHS, RHS);
+ // fall through
+ case ICmpInst::ICMP_ULE:
+ return
+ // min(A, ...) <= A
+ IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
+ // A <= max(A, ...)
+ IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
+ }
+
+ llvm_unreachable("covered switch fell through?!");
+}
+
/// isImpliedCondOperandsHelper - Test whether the condition described by
/// Pred, LHS, and RHS is true whenever the condition described by Pred,
/// FoundLHS, and FoundRHS is true.
@@ -6506,6 +6970,12 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
+ auto IsKnownPredicateFull =
+ [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
+ return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
+ IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS);
+ };
+
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
@@ -6515,26 +6985,26 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
break;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
- isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+ if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+ IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
return true;
break;
}
@@ -6614,13 +7084,13 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
/// specified less-than comparison will execute. If not computable, return
/// CouldNotCompute.
///
-/// @param IsSubExpr is true when the LHS < RHS condition does not directly
-/// control the branch. In this case, we can only compute an iteration count for
-/// a subexpression that cannot overflow before evaluating true.
+/// @param ControlsExit is true when the LHS < RHS condition directly controls
+/// the branch (loops exits only if condition is true). In this case, we can use
+/// NoWrapFlags to skip overflow checks.
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
- bool IsSubExpr) {
+ bool ControlsExit) {
// We handle only IV < Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
@@ -6631,7 +7101,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- bool NoWrap = !IsSubExpr &&
+ bool NoWrap = ControlsExit &&
IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
const SCEV *Stride = IV->getStepRecurrence(*this);
@@ -6651,9 +7121,19 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
: ICmpInst::ICMP_ULT;
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
- End = IsSigned ? getSMaxExpr(RHS, Start)
- : getUMaxExpr(RHS, Start);
+ if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) {
+ const SCEV *Diff = getMinusSCEV(RHS, Start);
+ // If we have NoWrap set, then we can assume that the increment won't
+ // overflow, in which case if RHS - Start is a constant, we don't need to
+ // do a max operation since we can just figure it out statically
+ if (NoWrap && isa<SCEVConstant>(Diff)) {
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ if (D.isNegative())
+ End = Start;
+ } else
+ End = IsSigned ? getSMaxExpr(RHS, Start)
+ : getUMaxExpr(RHS, Start);
+ }
const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
@@ -6684,13 +7164,13 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
+ return ExitLimit(BECount, MaxBECount);
}
ScalarEvolution::ExitLimit
ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
- bool IsSubExpr) {
+ bool ControlsExit) {
// We handle only IV > Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
@@ -6701,7 +7181,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- bool NoWrap = !IsSubExpr &&
+ bool NoWrap = ControlsExit &&
IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
@@ -6722,9 +7202,19 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
- End = IsSigned ? getSMinExpr(RHS, Start)
- : getUMinExpr(RHS, Start);
+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
+ const SCEV *Diff = getMinusSCEV(RHS, Start);
+ // If we have NoWrap set, then we can assume that the increment won't
+ // overflow, in which case if RHS - Start is a constant, we don't need to
+ // do a max operation since we can just figure it out statically
+ if (NoWrap && isa<SCEVConstant>(Diff)) {
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ if (!D.isNegative())
+ End = Start;
+ } else
+ End = IsSigned ? getSMinExpr(RHS, Start)
+ : getUMinExpr(RHS, Start);
+ }
const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
@@ -6756,7 +7246,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
+ return ExitLimit(BECount, MaxBECount);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6984,268 +7474,6 @@ void SCEVAddRecExpr::collectParametricTerms(
});
}
-static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.sext(ABW);
- else if (ABW < BBW)
- A = A.sext(BBW);
-
- return APIntOps::srem(A, B);
-}
-
-static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
- APInt A = C1->getValue()->getValue();
- APInt B = C2->getValue()->getValue();
- uint32_t ABW = A.getBitWidth();
- uint32_t BBW = B.getBitWidth();
-
- if (ABW > BBW)
- B = B.sext(ABW);
- else if (ABW < BBW)
- A = A.sext(BBW);
-
- return APIntOps::sdiv(A, B);
-}
-
-namespace {
-struct FindSCEVSize {
- int Size;
- FindSCEVSize() : Size(0) {}
-
- bool follow(const SCEV *S) {
- ++Size;
- // Keep looking at all operands of S.
- return true;
- }
- bool isDone() const {
- return false;
- }
-};
-}
-
-// Returns the size of the SCEV S.
-static inline int sizeOfSCEV(const SCEV *S) {
- FindSCEVSize F;
- SCEVTraversal<FindSCEVSize> ST(F);
- ST.visitAll(S);
- return F.Size;
-}
-
-namespace {
-
-struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
-public:
- // Computes the Quotient and Remainder of the division of Numerator by
- // Denominator.
- static void divide(ScalarEvolution &SE, const SCEV *Numerator,
- const SCEV *Denominator, const SCEV **Quotient,
- const SCEV **Remainder) {
- assert(Numerator && Denominator && "Uninitialized SCEV");
-
- SCEVDivision D(SE, Numerator, Denominator);
-
- // Check for the trivial case here to avoid having to check for it in the
- // rest of the code.
- if (Numerator == Denominator) {
- *Quotient = D.One;
- *Remainder = D.Zero;
- return;
- }
-
- if (Numerator->isZero()) {
- *Quotient = D.Zero;
- *Remainder = D.Zero;
- return;
- }
-
- // Split the Denominator when it is a product.
- if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
- const SCEV *Q, *R;
- *Quotient = Numerator;
- for (const SCEV *Op : T->operands()) {
- divide(SE, *Quotient, Op, &Q, &R);
- *Quotient = Q;
-
- // Bail out when the Numerator is not divisible by one of the terms of
- // the Denominator.
- if (!R->isZero()) {
- *Quotient = D.Zero;
- *Remainder = Numerator;
- return;
- }
- }
- *Remainder = D.Zero;
- return;
- }
-
- D.visit(Numerator);
- *Quotient = D.Quotient;
- *Remainder = D.Remainder;
- }
-
- SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator)
- : SE(S), Denominator(Denominator) {
- Zero = SE.getConstant(Denominator->getType(), 0);
- One = SE.getConstant(Denominator->getType(), 1);
-
- // By default, we don't know how to divide Expr by Denominator.
- // Providing the default here simplifies the rest of the code.
- Quotient = Zero;
- Remainder = Numerator;
- }
-
- // Except in the trivial case described above, we do not know how to divide
- // Expr by Denominator for the following functions with empty implementation.
- void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
- void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
- void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
- void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
- void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
- void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
- void visitUnknown(const SCEVUnknown *Numerator) {}
- void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
-
- void visitConstant(const SCEVConstant *Numerator) {
- if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
- Quotient = SE.getConstant(sdiv(Numerator, D));
- Remainder = SE.getConstant(srem(Numerator, D));
- return;
- }
- }
-
- void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
- const SCEV *StartQ, *StartR, *StepQ, *StepR;
- assert(Numerator->isAffine() && "Numerator should be affine");
- divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
- divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
- Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
- Numerator->getNoWrapFlags());
- Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
- Numerator->getNoWrapFlags());
- }
-
- void visitAddExpr(const SCEVAddExpr *Numerator) {
- SmallVector<const SCEV *, 2> Qs, Rs;
- Type *Ty = Denominator->getType();
-
- for (const SCEV *Op : Numerator->operands()) {
- const SCEV *Q, *R;
- divide(SE, Op, Denominator, &Q, &R);
-
- // Bail out if types do not match.
- if (Ty != Q->getType() || Ty != R->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- Qs.push_back(Q);
- Rs.push_back(R);
- }
-
- if (Qs.size() == 1) {
- Quotient = Qs[0];
- Remainder = Rs[0];
- return;
- }
-
- Quotient = SE.getAddExpr(Qs);
- Remainder = SE.getAddExpr(Rs);
- }
-
- void visitMulExpr(const SCEVMulExpr *Numerator) {
- SmallVector<const SCEV *, 2> Qs;
- Type *Ty = Denominator->getType();
-
- bool FoundDenominatorTerm = false;
- for (const SCEV *Op : Numerator->operands()) {
- // Bail out if types do not match.
- if (Ty != Op->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- if (FoundDenominatorTerm) {
- Qs.push_back(Op);
- continue;
- }
-
- // Check whether Denominator divides one of the product operands.
- const SCEV *Q, *R;
- divide(SE, Op, Denominator, &Q, &R);
- if (!R->isZero()) {
- Qs.push_back(Op);
- continue;
- }
-
- // Bail out if types do not match.
- if (Ty != Q->getType()) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- FoundDenominatorTerm = true;
- Qs.push_back(Q);
- }
-
- if (FoundDenominatorTerm) {
- Remainder = Zero;
- if (Qs.size() == 1)
- Quotient = Qs[0];
- else
- Quotient = SE.getMulExpr(Qs);
- return;
- }
-
- if (!isa<SCEVUnknown>(Denominator)) {
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
-
- // The Remainder is obtained by replacing Denominator by 0 in Numerator.
- ValueToValueMap RewriteMap;
- RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
- cast<SCEVConstant>(Zero)->getValue();
- Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
-
- if (Remainder->isZero()) {
- // The Quotient is obtained by replacing Denominator by 1 in Numerator.
- RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
- cast<SCEVConstant>(One)->getValue();
- Quotient =
- SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
- return;
- }
-
- // Quotient is (Numerator - Remainder) divided by Denominator.
- const SCEV *Q, *R;
- const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
- if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
- // This SCEV does not seem to simplify: fail the division here.
- Quotient = Zero;
- Remainder = Numerator;
- return;
- }
- divide(SE, Diff, Denominator, &Q, &R);
- assert(R == Zero &&
- "(Numerator - Remainder) should evenly divide Denominator");
- Quotient = Q;
- }
-
-private:
- ScalarEvolution &SE;
- const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
-};
-}
-
static bool findArrayDimensionsRec(ScalarEvolution &SE,
SmallVectorImpl<const SCEV *> &Terms,
SmallVectorImpl<const SCEV *> &Sizes) {
@@ -7609,7 +7837,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
// that until everything else is done.
if (U == Old)
continue;
- if (!Visited.insert(U))
+ if (!Visited.insert(U).second)
continue;
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
@@ -7638,6 +7866,7 @@ ScalarEvolution::ScalarEvolution()
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
LI = &getAnalysis<LoopInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
DL = DLP ? &DLP->getDataLayout() : nullptr;
@@ -7678,6 +7907,7 @@ void ScalarEvolution::releaseMemory() {
void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredTransitive<LoopInfo>();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfo>();
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 6933f74150fd..5c339eecdd62 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -162,10 +162,10 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
if (alias(Location(AO ? AO : LocA.Ptr,
AO ? +UnknownSize : LocA.Size,
- AO ? nullptr : LocA.TBAATag),
+ AO ? AAMDNodes() : LocA.AATags),
Location(BO ? BO : LocB.Ptr,
BO ? +UnknownSize : LocB.Size,
- BO ? nullptr : LocB.TBAATag)) == NoAlias)
+ BO ? AAMDNodes() : LocB.AATags)) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 968c619a48dd..59f19a002ecc 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1443,7 +1443,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
BasicBlock *HP = *HPI;
- if (!PredSeen.insert(HP)) {
+ if (!PredSeen.insert(HP).second) {
// There must be an incoming value for each predecessor, even the
// duplicates!
CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
@@ -1711,7 +1711,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT)) {
+ if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) {
Phi->replaceAllUsesWith(V);
DeadInsts.push_back(Phi);
++NumElim;
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 3ccefb01101d..b238fe43cc60 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -126,7 +126,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
// Normalized form: {-2,+,1,+,2}
// Denormalized form: {1,+,3,+,2}
//
- // However, denormalization would use the a different step expression than
+ // However, denormalization would use a different step expression than
// normalization (see getPostIncExpr), generating the wrong final
// expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
if (AR->isAffine() &&
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
new file mode 100644
index 000000000000..c6ea3af3d0c3
--- /dev/null
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -0,0 +1,245 @@
+//===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScopedNoAlias alias-analysis pass, which implements
+// metadata-based scoped no-alias support.
+//
+// Alias-analysis scopes are defined by an id (which can be a string or some
+// other metadata node), a domain node, and an optional descriptive string.
+// A domain is defined by an id (which can be a string or some other metadata
+// node), and an optional descriptive string.
+//
+// !dom0 = metadata !{ metadata !"domain of foo()" }
+// !scope1 = metadata !{ metadata !scope1, metadata !dom0, metadata !"scope 1" }
+// !scope2 = metadata !{ metadata !scope2, metadata !dom0, metadata !"scope 2" }
+//
+// Loads and stores can be tagged with an alias-analysis scope, and also, with
+// a noalias tag for a specific scope:
+//
+// ... = load %ptr1, !alias.scope !{ !scope1 }
+// ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 }
+//
+// When evaluating an aliasing query, if one of the instructions is associated
+// has a set of noalias scopes in some domain that is superset of the alias
+// scopes in that domain of some other instruction, then the two memory
+// accesses are assumed not to alias.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling scoped no-alias functionality. The same effect
+// can also be achieved by stripping the associated metadata tags from IR, but
+// this option is sometimes more convenient.
+static cl::opt<bool>
+EnableScopedNoAlias("enable-scoped-noalias", cl::init(true));
+
+namespace {
+/// AliasScopeNode - This is a simple wrapper around an MDNode which provides
+/// a higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class AliasScopeNode {
+ const MDNode *Node;
+
+public:
+ AliasScopeNode() : Node(0) {}
+ explicit AliasScopeNode(const MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this AliasScopeNode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getDomain - Get the MDNode for this AliasScopeNode's domain.
+ const MDNode *getDomain() const {
+ if (Node->getNumOperands() < 2)
+ return nullptr;
+ return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ }
+};
+
+/// ScopedNoAliasAA - This is a simple alias analysis
+/// implementation that uses scoped-noalias metadata to answer queries.
+class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis {
+public:
+ static char ID; // Class identification, replacement for typeinfo
+ ScopedNoAliasAA() : ImmutablePass(ID) {
+ initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
+ }
+
+ void initializePass() override { InitializeAliasAnalysis(this); }
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ void *getAdjustedAnalysisPointer(const void *PI) override {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+protected:
+ bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
+ void collectMDInDomain(const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) const;
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ AliasResult alias(const Location &LocA, const Location &LocB) override;
+ bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
+ ModRefBehavior getModRefBehavior(const Function *F) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) override;
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) override;
+};
+} // End of anonymous namespace
+
+// Register this pass...
+char ScopedNoAliasAA::ID = 0;
+INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias",
+ "Scoped NoAlias Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createScopedNoAliasAAPass() {
+ return new ScopedNoAliasAA();
+}
+
+void
+ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+void
+ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) const {
+ for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i)
+ if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i)))
+ if (AliasScopeNode(MD).getDomain() == Domain)
+ Nodes.insert(MD);
+}
+
+bool
+ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
+ const MDNode *NoAlias) const {
+ if (!Scopes || !NoAlias)
+ return true;
+
+ // Collect the set of scope domains relevant to the noalias scopes.
+ SmallPtrSet<const MDNode *, 16> Domains;
+ for (unsigned i = 0, ie = NoAlias->getNumOperands(); i != ie; ++i)
+ if (const MDNode *NAMD = dyn_cast<MDNode>(NoAlias->getOperand(i)))
+ if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain())
+ Domains.insert(Domain);
+
+ // We alias unless, for some domain, the set of noalias scopes in that domain
+ // is a superset of the set of alias scopes in that domain.
+ for (const MDNode *Domain : Domains) {
+ SmallPtrSet<const MDNode *, 16> NANodes, ScopeNodes;
+ collectMDInDomain(NoAlias, Domain, NANodes);
+ collectMDInDomain(Scopes, Domain, ScopeNodes);
+ if (!ScopeNodes.size())
+ continue;
+
+ // To not alias, all of the nodes in ScopeNodes must be in NANodes.
+ bool FoundAll = true;
+ for (const MDNode *SMD : ScopeNodes)
+ if (!NANodes.count(SMD)) {
+ FoundAll = false;
+ break;
+ }
+
+ if (FoundAll)
+ return false;
+ }
+
+ return true;
+}
+
+AliasAnalysis::AliasResult
+ScopedNoAliasAA::alias(const Location &LocA, const Location &LocB) {
+ if (!EnableScopedNoAlias)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Get the attached MDNodes.
+ const MDNode *AScopes = LocA.AATags.Scope,
+ *BScopes = LocB.AATags.Scope;
+
+ const MDNode *ANoAlias = LocA.AATags.NoAlias,
+ *BNoAlias = LocB.AATags.NoAlias;
+
+ if (!mayAliasInScopes(AScopes, BNoAlias))
+ return NoAlias;
+
+ if (!mayAliasInScopes(BScopes, ANoAlias))
+ return NoAlias;
+
+ // If they may alias, chain to the next AliasAnalysis.
+ return AliasAnalysis::alias(LocA, LocB);
+}
+
+bool ScopedNoAliasAA::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) {
+ return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ScopedNoAliasAA::getModRefBehavior(const Function *F) {
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+ if (!EnableScopedNoAlias)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
+ LLVMContext::MD_noalias)))
+ return NoModRef;
+
+ if (!mayAliasInScopes(
+ CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ Loc.AATags.NoAlias))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ if (!EnableScopedNoAlias)
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+ if (!mayAliasInScopes(
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return NoModRef;
+
+ if (!mayAliasInScopes(
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
new file mode 100644
index 000000000000..fd3fbc0d86ad
--- /dev/null
+++ b/lib/Analysis/StratifiedSets.h
@@ -0,0 +1,692 @@
+//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRATIFIEDSETS_H
+#define LLVM_ADT_STRATIFIEDSETS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include <bitset>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+// \brief An index into Stratified Sets.
+typedef unsigned StratifiedIndex;
+// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
+// ~1M sets exist.
+
+// \brief Container of information related to a value in a StratifiedSet.
+struct StratifiedInfo {
+ StratifiedIndex Index;
+ // For field sensitivity, etc. we can tack attributes on to this struct.
+};
+
+// The number of attributes that StratifiedAttrs should contain. Attributes are
+// described below, and 32 was an arbitrary choice because it fits nicely in 32
+// bits (because we use a bitset for StratifiedAttrs).
+static const unsigned NumStratifiedAttrs = 32;
+
+// These are attributes that the users of StratifiedSets/StratifiedSetBuilders
+// may use for various purposes. These also have the special property of that
+// they are merged down. So, if set A is above set B, and one decides to set an
+// attribute in set A, then the attribute will automatically be set in set B.
+typedef std::bitset<NumStratifiedAttrs> StratifiedAttrs;
+
+// \brief A "link" between two StratifiedSets.
+struct StratifiedLink {
+ // \brief This is a value used to signify "does not exist" where
+ // the StratifiedIndex type is used. This is used instead of
+ // Optional<StratifiedIndex> because Optional<StratifiedIndex> would
+ // eat up a considerable amount of extra memory, after struct
+ // padding/alignment is taken into account.
+ static const StratifiedIndex SetSentinel;
+
+ // \brief The index for the set "above" current
+ StratifiedIndex Above;
+
+ // \brief The link for the set "below" current
+ StratifiedIndex Below;
+
+ // \brief Attributes for these StratifiedSets.
+ StratifiedAttrs Attrs;
+
+ StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {}
+
+ bool hasBelow() const { return Below != SetSentinel; }
+ bool hasAbove() const { return Above != SetSentinel; }
+
+ void clearBelow() { Below = SetSentinel; }
+ void clearAbove() { Above = SetSentinel; }
+};
+
+// \brief These are stratified sets, as described in "Fast algorithms for
+// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
+// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
+// of Value*s. If two Value*s are in the same set, or if both sets have
+// overlapping attributes, then the Value*s are said to alias.
+//
+// Sets may be related by position, meaning that one set may be considered as
+// above or below another. In CFL Alias Analysis, this gives us an indication
+// of how two variables are related; if the set of variable A is below a set
+// containing variable B, then at some point, a variable that has interacted
+// with B (or B itself) was either used in order to extract the variable A, or
+// was used as storage of variable A.
+//
+// Sets may also have attributes (as noted above). These attributes are
+// generally used for noting whether a variable in the set has interacted with
+// a variable whose origins we don't quite know (i.e. globals/arguments), or if
+// the variable may have had operations performed on it (modified in a function
+// call). All attributes that exist in a set A must exist in all sets marked as
+// below set A.
+template <typename T> class StratifiedSets {
+public:
+ StratifiedSets() {}
+
+ StratifiedSets(DenseMap<T, StratifiedInfo> Map,
+ std::vector<StratifiedLink> Links)
+ : Values(std::move(Map)), Links(std::move(Links)) {}
+
+ StratifiedSets(StratifiedSets<T> &&Other) { *this = std::move(Other); }
+
+ StratifiedSets &operator=(StratifiedSets<T> &&Other) {
+ Values = std::move(Other.Values);
+ Links = std::move(Other.Links);
+ return *this;
+ }
+
+ Optional<StratifiedInfo> find(const T &Elem) const {
+ auto Iter = Values.find(Elem);
+ if (Iter == Values.end()) {
+ return NoneType();
+ }
+ return Iter->second;
+ }
+
+ const StratifiedLink &getLink(StratifiedIndex Index) const {
+ assert(inbounds(Index));
+ return Links[Index];
+ }
+
+private:
+ DenseMap<T, StratifiedInfo> Values;
+ std::vector<StratifiedLink> Links;
+
+ bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); }
+};
+
+// \brief Generic Builder class that produces StratifiedSets instances.
+//
+// The goal of this builder is to efficiently produce correct StratifiedSets
+// instances. To this end, we use a few tricks:
+// > Set chains (A method for linking sets together)
+// > Set remaps (A method for marking a set as an alias [irony?] of another)
+//
+// ==== Set chains ====
+// This builder has a notion of some value A being above, below, or with some
+// other value B:
+// > The `A above B` relationship implies that there is a reference edge going
+// from A to B. Namely, it notes that A can store anything in B's set.
+// > The `A below B` relationship is the opposite of `A above B`. It implies
+// that there's a dereference edge going from A to B.
+// > The `A with B` relationship states that there's an assignment edge going
+// from A to B, and that A and B should be treated as equals.
+//
+// As an example, take the following code snippet:
+//
+// %a = alloca i32, align 4
+// %ap = alloca i32*, align 8
+// %app = alloca i32**, align 8
+// store %a, %ap
+// store %ap, %app
+// %aw = getelementptr %ap, 0
+//
+// Given this, the follow relations exist:
+// - %a below %ap & %ap above %a
+// - %ap below %app & %app above %ap
+// - %aw with %ap & %ap with %aw
+//
+// These relations produce the following sets:
+// [{%a}, {%ap, %aw}, {%app}]
+//
+// ...Which states that the only MayAlias relationship in the above program is
+// between %ap and %aw.
+//
+// Life gets more complicated when we actually have logic in our programs. So,
+// we either must remove this logic from our programs, or make consessions for
+// it in our AA algorithms. In this case, we have decided to select the latter
+// option.
+//
+// First complication: Conditionals
+// Motivation:
+// %ad = alloca int, align 4
+// %a = alloca int*, align 8
+// %b = alloca int*, align 8
+// %bp = alloca int**, align 8
+// %c = call i1 @SomeFunc()
+// %k = select %c, %ad, %bp
+// store %ad, %a
+// store %b, %bp
+//
+// %k has 'with' edges to both %a and %b, which ordinarily would not be linked
+// together. So, we merge the set that contains %a with the set that contains
+// %b. We then recursively merge the set above %a with the set above %b, and
+// the set below %a with the set below %b, etc. Ultimately, the sets for this
+// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below
+// {%a, %b, %c} is below {%ad}.
+//
+// Second complication: Arbitrary casts
+// Motivation:
+// %ip = alloca int*, align 8
+// %ipp = alloca int**, align 8
+// %i = bitcast ipp to int
+// store %ip, %ipp
+// store %i, %ip
+//
+// This is impossible to construct with any of the rules above, because a set
+// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed
+// to be below the set with %ip, and the set with %ip is supposed to be below
+// the set with %ipp. Because we don't allow circular relationships like this,
+// we merge all concerned sets into one. So, the above code would generate a
+// single StratifiedSet: {%ip, %ipp, %i}.
+//
+// ==== Set remaps ====
+// More of an implementation detail than anything -- when merging sets, we need
+// to update the numbers of all of the elements mapped to those sets. Rather
+// than doing this at each merge, we note in the BuilderLink structure that a
+// remap has occurred, and use this information so we can defer renumbering set
+// elements until build time.
+template <typename T> class StratifiedSetsBuilder {
+ // \brief Represents a Stratified Set, with information about the Stratified
+ // Set above it, the set below it, and whether the current set has been
+ // remapped to another.
+ struct BuilderLink {
+ const StratifiedIndex Number;
+
+ BuilderLink(StratifiedIndex N) : Number(N) {
+ Remap = StratifiedLink::SetSentinel;
+ }
+
+ bool hasAbove() const {
+ assert(!isRemapped());
+ return Link.hasAbove();
+ }
+
+ bool hasBelow() const {
+ assert(!isRemapped());
+ return Link.hasBelow();
+ }
+
+ void setBelow(StratifiedIndex I) {
+ assert(!isRemapped());
+ Link.Below = I;
+ }
+
+ void setAbove(StratifiedIndex I) {
+ assert(!isRemapped());
+ Link.Above = I;
+ }
+
+ void clearBelow() {
+ assert(!isRemapped());
+ Link.clearBelow();
+ }
+
+ void clearAbove() {
+ assert(!isRemapped());
+ Link.clearAbove();
+ }
+
+ StratifiedIndex getBelow() const {
+ assert(!isRemapped());
+ assert(hasBelow());
+ return Link.Below;
+ }
+
+ StratifiedIndex getAbove() const {
+ assert(!isRemapped());
+ assert(hasAbove());
+ return Link.Above;
+ }
+
+ StratifiedAttrs &getAttrs() {
+ assert(!isRemapped());
+ return Link.Attrs;
+ }
+
+ void setAttr(unsigned index) {
+ assert(!isRemapped());
+ assert(index < NumStratifiedAttrs);
+ Link.Attrs.set(index);
+ }
+
+ void setAttrs(const StratifiedAttrs &other) {
+ assert(!isRemapped());
+ Link.Attrs |= other;
+ }
+
+ bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; }
+
+ // \brief For initial remapping to another set
+ void remapTo(StratifiedIndex Other) {
+ assert(!isRemapped());
+ Remap = Other;
+ }
+
+ StratifiedIndex getRemapIndex() const {
+ assert(isRemapped());
+ return Remap;
+ }
+
+ // \brief Should only be called when we're already remapped.
+ void updateRemap(StratifiedIndex Other) {
+ assert(isRemapped());
+ Remap = Other;
+ }
+
+ // \brief Prefer the above functions to calling things directly on what's
+ // returned from this -- they guard against unexpected calls when the
+ // current BuilderLink is remapped.
+ const StratifiedLink &getLink() const { return Link; }
+
+ private:
+ StratifiedLink Link;
+ StratifiedIndex Remap;
+ };
+
+ // \brief This function performs all of the set unioning/value renumbering
+ // that we've been putting off, and generates a vector<StratifiedLink> that
+ // may be placed in a StratifiedSets instance.
+ void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
+ DenseMap<StratifiedIndex, StratifiedIndex> Remaps;
+ for (auto &Link : Links) {
+ if (Link.isRemapped()) {
+ continue;
+ }
+
+ StratifiedIndex Number = StratLinks.size();
+ Remaps.insert(std::make_pair(Link.Number, Number));
+ StratLinks.push_back(Link.getLink());
+ }
+
+ for (auto &Link : StratLinks) {
+ if (Link.hasAbove()) {
+ auto &Above = linksAt(Link.Above);
+ auto Iter = Remaps.find(Above.Number);
+ assert(Iter != Remaps.end());
+ Link.Above = Iter->second;
+ }
+
+ if (Link.hasBelow()) {
+ auto &Below = linksAt(Link.Below);
+ auto Iter = Remaps.find(Below.Number);
+ assert(Iter != Remaps.end());
+ Link.Below = Iter->second;
+ }
+ }
+
+ for (auto &Pair : Values) {
+ auto &Info = Pair.second;
+ auto &Link = linksAt(Info.Index);
+ auto Iter = Remaps.find(Link.Number);
+ assert(Iter != Remaps.end());
+ Info.Index = Iter->second;
+ }
+ }
+
+ // \brief There's a guarantee in StratifiedLink where all bits set in a
+ // Link.externals will be set in all Link.externals "below" it.
+ static void propagateAttrs(std::vector<StratifiedLink> &Links) {
+ const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
+ const auto *Link = &Links[Idx];
+ while (Link->hasAbove()) {
+ Idx = Link->Above;
+ Link = &Links[Idx];
+ }
+ return Idx;
+ };
+
+ SmallSet<StratifiedIndex, 16> Visited;
+ for (unsigned I = 0, E = Links.size(); I < E; ++I) {
+ auto CurrentIndex = getHighestParentAbove(I);
+ if (!Visited.insert(CurrentIndex).second) {
+ continue;
+ }
+
+ while (Links[CurrentIndex].hasBelow()) {
+ auto &CurrentBits = Links[CurrentIndex].Attrs;
+ auto NextIndex = Links[CurrentIndex].Below;
+ auto &NextBits = Links[NextIndex].Attrs;
+ NextBits |= CurrentBits;
+ CurrentIndex = NextIndex;
+ }
+ }
+ }
+
+public:
+ // \brief Builds a StratifiedSet from the information we've been given since
+ // either construction or the prior build() call.
+ StratifiedSets<T> build() {
+ std::vector<StratifiedLink> StratLinks;
+ finalizeSets(StratLinks);
+ propagateAttrs(StratLinks);
+ Links.clear();
+ return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
+ }
+
+ std::size_t size() const { return Values.size(); }
+ std::size_t numSets() const { return Links.size(); }
+
+ bool has(const T &Elem) const { return get(Elem).hasValue(); }
+
+ bool add(const T &Main) {
+ if (get(Main).hasValue())
+ return false;
+
+ auto NewIndex = getNewUnlinkedIndex();
+ return addAtMerging(Main, NewIndex);
+ }
+
+ // \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+ // set above "Main". There are some cases where this is not possible (see
+ // above), so we merge them such that ToAdd and Main are in the same set.
+ bool addAbove(const T &Main, const T &ToAdd) {
+ assert(has(Main));
+ auto Index = *indexOf(Main);
+ if (!linksAt(Index).hasAbove())
+ addLinkAbove(Index);
+
+ auto Above = linksAt(Index).getAbove();
+ return addAtMerging(ToAdd, Above);
+ }
+
+ // \brief Restructures the stratified sets as necessary to make "ToAdd" in a
+ // set below "Main". There are some cases where this is not possible (see
+ // above), so we merge them such that ToAdd and Main are in the same set.
+ bool addBelow(const T &Main, const T &ToAdd) {
+ assert(has(Main));
+ auto Index = *indexOf(Main);
+ if (!linksAt(Index).hasBelow())
+ addLinkBelow(Index);
+
+ auto Below = linksAt(Index).getBelow();
+ return addAtMerging(ToAdd, Below);
+ }
+
+ bool addWith(const T &Main, const T &ToAdd) {
+ assert(has(Main));
+ auto MainIndex = *indexOf(Main);
+ return addAtMerging(ToAdd, MainIndex);
+ }
+
+ void noteAttribute(const T &Main, unsigned AttrNum) {
+ assert(has(Main));
+ assert(AttrNum < StratifiedLink::SetSentinel);
+ auto *Info = *get(Main);
+ auto &Link = linksAt(Info->Index);
+ Link.setAttr(AttrNum);
+ }
+
+ void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) {
+ assert(has(Main));
+ auto *Info = *get(Main);
+ auto &Link = linksAt(Info->Index);
+ Link.setAttrs(NewAttrs);
+ }
+
+ StratifiedAttrs getAttributes(const T &Main) {
+ assert(has(Main));
+ auto *Info = *get(Main);
+ auto *Link = &linksAt(Info->Index);
+ auto Attrs = Link->getAttrs();
+ while (Link->hasAbove()) {
+ Link = &linksAt(Link->getAbove());
+ Attrs |= Link->getAttrs();
+ }
+
+ return Attrs;
+ }
+
+ bool getAttribute(const T &Main, unsigned AttrNum) {
+ assert(AttrNum < StratifiedLink::SetSentinel);
+ auto Attrs = getAttributes(Main);
+ return Attrs[AttrNum];
+ }
+
+ // \brief Gets the attributes that have been applied to the set that Main
+ // belongs to. It ignores attributes in any sets above the one that Main
+ // resides in.
+ StratifiedAttrs getRawAttributes(const T &Main) {
+ assert(has(Main));
+ auto *Info = *get(Main);
+ auto &Link = linksAt(Info->Index);
+ return Link.getAttrs();
+ }
+
+ // \brief Gets an attribute from the attributes that have been applied to the
+ // set that Main belongs to. It ignores attributes in any sets above the one
+ // that Main resides in.
+ bool getRawAttribute(const T &Main, unsigned AttrNum) {
+ assert(AttrNum < StratifiedLink::SetSentinel);
+ auto Attrs = getRawAttributes(Main);
+ return Attrs[AttrNum];
+ }
+
+private:
+ DenseMap<T, StratifiedInfo> Values;
+ std::vector<BuilderLink> Links;
+
+ // \brief Adds the given element at the given index, merging sets if
+ // necessary.
+ bool addAtMerging(const T &ToAdd, StratifiedIndex Index) {
+ StratifiedInfo Info = {Index};
+ auto Pair = Values.insert(std::make_pair(ToAdd, Info));
+ if (Pair.second)
+ return true;
+
+ auto &Iter = Pair.first;
+ auto &IterSet = linksAt(Iter->second.Index);
+ auto &ReqSet = linksAt(Index);
+
+ // Failed to add where we wanted to. Merge the sets.
+ if (&IterSet != &ReqSet)
+ merge(IterSet.Number, ReqSet.Number);
+
+ return false;
+ }
+
+ // \brief Gets the BuilderLink at the given index, taking set remapping into
+ // account.
+ BuilderLink &linksAt(StratifiedIndex Index) {
+ auto *Start = &Links[Index];
+ if (!Start->isRemapped())
+ return *Start;
+
+ auto *Current = Start;
+ while (Current->isRemapped())
+ Current = &Links[Current->getRemapIndex()];
+
+ auto NewRemap = Current->Number;
+
+ // Run through everything that has yet to be updated, and update them to
+ // remap to NewRemap
+ Current = Start;
+ while (Current->isRemapped()) {
+ auto *Next = &Links[Current->getRemapIndex()];
+ Current->updateRemap(NewRemap);
+ Current = Next;
+ }
+
+ return *Current;
+ }
+
+ // \brief Merges two sets into one another. Assumes that these sets are not
+ // already one in the same
+ void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
+ assert(inbounds(Idx1) && inbounds(Idx2));
+ assert(&linksAt(Idx1) != &linksAt(Idx2) &&
+ "Merging a set into itself is not allowed");
+
+ // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge
+ // both the
+ // given sets, and all sets between them, into one.
+ if (tryMergeUpwards(Idx1, Idx2))
+ return;
+
+ if (tryMergeUpwards(Idx2, Idx1))
+ return;
+
+ // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`.
+ // We therefore need to merge the two chains together.
+ mergeDirect(Idx1, Idx2);
+ }
+
+ // \brief Merges two sets assuming that the set at `Idx1` is unreachable from
+ // traversing above or below the set at `Idx2`.
+ void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
+ assert(inbounds(Idx1) && inbounds(Idx2));
+
+ auto *LinksInto = &linksAt(Idx1);
+ auto *LinksFrom = &linksAt(Idx2);
+ // Merging everything above LinksInto then proceeding to merge everything
+ // below LinksInto becomes problematic, so we go as far "up" as possible!
+ while (LinksInto->hasAbove() && LinksFrom->hasAbove()) {
+ LinksInto = &linksAt(LinksInto->getAbove());
+ LinksFrom = &linksAt(LinksFrom->getAbove());
+ }
+
+ if (LinksFrom->hasAbove()) {
+ LinksInto->setAbove(LinksFrom->getAbove());
+ auto &NewAbove = linksAt(LinksInto->getAbove());
+ NewAbove.setBelow(LinksInto->Number);
+ }
+
+ // Merging strategy:
+ // > If neither has links below, stop.
+ // > If only `LinksInto` has links below, stop.
+ // > If only `LinksFrom` has links below, reset `LinksInto.Below` to
+ // match `LinksFrom.Below`
+ // > If both have links above, deal with those next.
+ while (LinksInto->hasBelow() && LinksFrom->hasBelow()) {
+ auto &FromAttrs = LinksFrom->getAttrs();
+ LinksInto->setAttrs(FromAttrs);
+
+ // Remap needs to happen after getBelow(), but before
+ // assignment of LinksFrom
+ auto *NewLinksFrom = &linksAt(LinksFrom->getBelow());
+ LinksFrom->remapTo(LinksInto->Number);
+ LinksFrom = NewLinksFrom;
+ LinksInto = &linksAt(LinksInto->getBelow());
+ }
+
+ if (LinksFrom->hasBelow()) {
+ LinksInto->setBelow(LinksFrom->getBelow());
+ auto &NewBelow = linksAt(LinksInto->getBelow());
+ NewBelow.setAbove(LinksInto->Number);
+ }
+
+ LinksFrom->remapTo(LinksInto->Number);
+ }
+
+ // \brief Checks to see if lowerIndex is at a level lower than upperIndex.
+ // If so, it will merge lowerIndex with upperIndex (and all of the sets
+ // between) and return true. Otherwise, it will return false.
+ bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) {
+ assert(inbounds(LowerIndex) && inbounds(UpperIndex));
+ auto *Lower = &linksAt(LowerIndex);
+ auto *Upper = &linksAt(UpperIndex);
+ if (Lower == Upper)
+ return true;
+
+ SmallVector<BuilderLink *, 8> Found;
+ auto *Current = Lower;
+ auto Attrs = Current->getAttrs();
+ while (Current->hasAbove() && Current != Upper) {
+ Found.push_back(Current);
+ Attrs |= Current->getAttrs();
+ Current = &linksAt(Current->getAbove());
+ }
+
+ if (Current != Upper)
+ return false;
+
+ Upper->setAttrs(Attrs);
+
+ if (Lower->hasBelow()) {
+ auto NewBelowIndex = Lower->getBelow();
+ Upper->setBelow(NewBelowIndex);
+ auto &NewBelow = linksAt(NewBelowIndex);
+ NewBelow.setAbove(UpperIndex);
+ } else {
+ Upper->clearBelow();
+ }
+
+ for (const auto &Ptr : Found)
+ Ptr->remapTo(Upper->Number);
+
+ return true;
+ }
+
+ Optional<const StratifiedInfo *> get(const T &Val) const {
+ auto Result = Values.find(Val);
+ if (Result == Values.end())
+ return NoneType();
+ return &Result->second;
+ }
+
+ Optional<StratifiedInfo *> get(const T &Val) {
+ auto Result = Values.find(Val);
+ if (Result == Values.end())
+ return NoneType();
+ return &Result->second;
+ }
+
+ Optional<StratifiedIndex> indexOf(const T &Val) {
+ auto MaybeVal = get(Val);
+ if (!MaybeVal.hasValue())
+ return NoneType();
+ auto *Info = *MaybeVal;
+ auto &Link = linksAt(Info->Index);
+ return Link.Number;
+ }
+
+ StratifiedIndex addLinkBelow(StratifiedIndex Set) {
+ auto At = addLinks();
+ Links[Set].setBelow(At);
+ Links[At].setAbove(Set);
+ return At;
+ }
+
+ StratifiedIndex addLinkAbove(StratifiedIndex Set) {
+ auto At = addLinks();
+ Links[At].setBelow(Set);
+ Links[Set].setAbove(At);
+ return At;
+ }
+
+ StratifiedIndex getNewUnlinkedIndex() { return addLinks(); }
+
+ StratifiedIndex addLinks() {
+ auto Link = Links.size();
+ Links.push_back(BuilderLink(Link));
+ return Link;
+ }
+
+ bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
+};
+}
+#endif // LLVM_ADT_STRATIFIEDSETS_H
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index cdb0b79fd7ee..ef3909b326f9 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -87,9 +87,10 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return PrevTTI->isLoweredToCall(F);
}
-void TargetTransformInfo::getUnrollingPreferences(Loop *L,
- UnrollingPreferences &UP) const {
- PrevTTI->getUnrollingPreferences(L, UP);
+void
+TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L,
+ UnrollingPreferences &UP) const {
+ PrevTTI->getUnrollingPreferences(F, L, UP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
@@ -100,6 +101,17 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
return PrevTTI->isLegalICmpImmediate(Imm);
}
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
+ int Consecutive) const {
+ return false;
+}
+
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
+ int Consecutive) const {
+ return false;
+}
+
+
bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
@@ -167,15 +179,16 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
return PrevTTI->getRegisterBitWidth(Vector);
}
-unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
- return PrevTTI->getMaximumUnrollFactor();
+unsigned TargetTransformInfo::getMaxInterleaveFactor() const {
+ return PrevTTI->getMaxInterleaveFactor();
}
-unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
- Type *Ty,
- OperandValueKind Op1Info,
- OperandValueKind Op2Info) const {
- return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+unsigned TargetTransformInfo::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo) const {
+ return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
}
unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
@@ -206,7 +219,6 @@ unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) const {
return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
- ;
}
unsigned
@@ -230,6 +242,11 @@ unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
}
+unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys)
+ const {
+ return PrevTTI->getCostOfKeepingLiveOverCall(Tys);
+}
+
namespace {
struct NoTTI final : ImmutablePass, TargetTransformInfo {
@@ -239,7 +256,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
initializeNoTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
+ void initializePass() override {
// Note that this subclass is special, and must *not* call initializeTTI as
// it does not chain.
TopTTI = this;
@@ -248,7 +265,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
DL = DLP ? &DLP->getDataLayout() : nullptr;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
// Note that this subclass is special, and must *not* call
// TTI::getAnalysisUsage as it breaks the recursion.
}
@@ -257,7 +274,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo*)this;
return this;
@@ -385,6 +402,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
// FIXME: This is wrong for libc intrinsics.
return TCC_Basic;
+ case Intrinsic::annotation:
+ case Intrinsic::assume:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::invariant_start:
@@ -394,6 +413,10 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
case Intrinsic::objectsize:
case Intrinsic::ptr_annotation:
case Intrinsic::var_annotation:
+ case Intrinsic::experimental_gc_result_int:
+ case Intrinsic::experimental_gc_result_float:
+ case Intrinsic::experimental_gc_result_ptr:
+ case Intrinsic::experimental_gc_relocate:
// These intrinsics don't actually represent code after lowering.
return TCC_Free;
}
@@ -466,6 +489,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
// These will all likely lower to a single selection DAG node.
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
+ Name == "fmin" || Name == "fminf" || Name == "fminl" ||
+ Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
return false;
@@ -480,8 +505,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
return true;
}
- void getUnrollingPreferences(Loop *, UnrollingPreferences &) const override {
- }
+ void getUnrollingPreferences(const Function *, Loop *,
+ UnrollingPreferences &) const override {}
bool isLegalAddImmediate(int64_t Imm) const override {
return false;
@@ -558,12 +583,13 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
return 32;
}
- unsigned getMaximumUnrollFactor() const override {
+ unsigned getMaxInterleaveFactor() const override {
return 1;
}
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind) const override {
+ OperandValueKind, OperandValueProperties,
+ OperandValueProperties) const override {
return 1;
}
@@ -612,6 +638,11 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
unsigned getReductionCost(unsigned, Type *, bool) const override {
return 1;
}
+
+ unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override {
+ return 0;
+ }
+
};
} // end anonymous namespace
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index f36f6f8a8768..085ce920139b 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -167,7 +167,7 @@ namespace {
bool TypeIsImmutable() const {
if (Node->getNumOperands() < 3)
return false;
- ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
if (!CI)
return false;
return CI->getValue()[0];
@@ -194,7 +194,7 @@ namespace {
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
uint64_t getOffset() const {
- return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
}
/// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
/// objects which are not modified (by any means) in the context where this
@@ -202,7 +202,7 @@ namespace {
bool TypeIsImmutable() const {
if (Node->getNumOperands() < 4)
return false;
- ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
+ ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
if (!CI)
return false;
return CI->getValue()[0];
@@ -233,8 +233,10 @@ namespace {
// Fast path for a scalar type node and a struct type node with a single
// field.
if (Node->getNumOperands() <= 3) {
- uint64_t Cur = Node->getNumOperands() == 2 ? 0 :
- cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ uint64_t Cur = Node->getNumOperands() == 2
+ ? 0
+ : mdconst::extract<ConstantInt>(Node->getOperand(2))
+ ->getZExtValue();
Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
if (!P)
@@ -246,8 +248,8 @@ namespace {
// the current offset is bigger than the given offset.
unsigned TheIdx = 0;
for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
- uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
- getZExtValue();
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
+ ->getZExtValue();
if (Cur > Offset) {
assert(Idx >= 3 &&
"TBAAStructTypeNode::getParent should have an offset match!");
@@ -258,8 +260,8 @@ namespace {
// Move along the last field.
if (TheIdx == 0)
TheIdx = Node->getNumOperands() - 2;
- uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
- getZExtValue();
+ uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
+ ->getZExtValue();
Offset -= Cur;
MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
if (!P)
@@ -454,9 +456,9 @@ TypeBasedAliasAnalysis::alias(const Location &LocA,
// Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
// be conservative.
- const MDNode *AM = LocA.TBAATag;
+ const MDNode *AM = LocA.AATags.TBAA;
if (!AM) return AliasAnalysis::alias(LocA, LocB);
- const MDNode *BM = LocB.TBAATag;
+ const MDNode *BM = LocB.AATags.TBAA;
if (!BM) return AliasAnalysis::alias(LocA, LocB);
// If they may alias, chain to the next AliasAnalysis.
@@ -472,7 +474,7 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
if (!EnableTBAA)
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
- const MDNode *M = Loc.TBAATag;
+ const MDNode *M = Loc.AATags.TBAA;
if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
@@ -513,9 +515,9 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (!EnableTBAA)
return AliasAnalysis::getModRefInfo(CS, Loc);
- if (const MDNode *L = Loc.TBAATag)
+ if (const MDNode *L = Loc.AATags.TBAA)
if (const MDNode *M =
- CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(L, M))
return NoModRef;
@@ -529,9 +531,9 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
if (const MDNode *M1 =
- CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (const MDNode *M2 =
- CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
if (!Aliases(M1, M2))
return NoModRef;
@@ -608,6 +610,28 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
return nullptr;
// We need to convert from a type node to a tag node.
Type *Int64 = IntegerType::get(A->getContext(), 64);
- Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
+ Metadata *Ops[3] = {Ret, Ret,
+ ConstantAsMetadata::get(ConstantInt::get(Int64, 0))};
return MDNode::get(A->getContext(), Ops);
}
+
+void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
+ if (Merge)
+ N.TBAA =
+ MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
+ else
+ N.TBAA = getMetadata(LLVMContext::MD_tbaa);
+
+ if (Merge)
+ N.Scope =
+ MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope));
+ else
+ N.Scope = getMetadata(LLVMContext::MD_alias_scope);
+
+ if (Merge)
+ N.NoAlias =
+ MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
+ else
+ N.NoAlias = getMetadata(LLVMContext::MD_noalias);
+}
+
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b6a3c36b91fe..5d909177c078 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -14,12 +14,14 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
@@ -37,8 +39,8 @@ using namespace llvm::PatternMatch;
const unsigned MaxDepth = 6;
-/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
-/// unknown returns 0). For vector types, returns the element type's bitwidth.
+/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
+/// 0). For vector types, returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
@@ -46,10 +48,123 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
}
+// Many of these functions have internal versions that take an assumption
+// exclusion set. This is because of the potential for mutual recursion to
+// cause computeKnownBits to repeatedly visit the same assume intrinsic. The
+// classic case of this is assume(x = y), which will attempt to determine
+// bits in x from bits in y, which will attempt to determine bits in y from
+// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
+// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
+// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so on.
+typedef SmallPtrSet<const Value *, 8> ExclInvsSet;
+
+namespace {
+// Simplifying using an assume can only be done in a particular control-flow
+// context (the context instruction provides that context). If an assume and
+// the context instruction are not in the same block then the DT helps in
+// figuring out if we can use it.
+struct Query {
+ ExclInvsSet ExclInvs;
+ AssumptionCache *AC;
+ const Instruction *CxtI;
+ const DominatorTree *DT;
+
+ Query(AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr)
+ : AC(AC), CxtI(CxtI), DT(DT) {}
+
+ Query(const Query &Q, const Value *NewExcl)
+ : ExclInvs(Q.ExclInvs), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT) {
+ ExclInvs.insert(NewExcl);
+ }
+};
+} // end anonymous namespace
+
+// Given the provided Value and, potentially, a context instruction, return
+// the preferred context instruction (if any).
+static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
+ // If we've been provided with a context instruction, then use that (provided
+ // it has been inserted).
+ if (CxtI && CxtI->getParent())
+ return CxtI;
+
+ // If the value is really an already-inserted instruction, then use that.
+ CxtI = dyn_cast<Instruction>(V);
+ if (CxtI && CxtI->getParent())
+ return CxtI;
+
+ return nullptr;
+}
+
+static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth,
+ Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth,
+ Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+ const Query &Q);
+
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
+ Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q);
+
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT) {
+ return ::MaskedValueIsZero(V, Mask, TD, Depth,
+ Query(AC, safeCxtI(V, CxtI), DT));
+}
+
+static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
+ unsigned Depth, const Query &Q);
+
+unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+}
+
static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth) {
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
if (!Add) {
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
// We know that the top bits of C-X are clear if X contains less bits
@@ -60,7 +175,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -76,55 +191,51 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
unsigned BitWidth = KnownZero.getBitWidth();
- // If one of the operands has trailing zeros, then the bits that the
- // other operand has in those bit positions will be preserved in the
- // result. For an add, this works with either operand. For a subtract,
- // this only works if the known zeros are in the right operand.
+ // If an initial sequence of bits in the result is not needed, the
+ // corresponding bits in the operands are not needed.
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
- unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
-
- llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
- unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
-
- // Determine which operand has more trailing zeros, and use that
- // many bits from the other operand.
- if (LHSKnownZeroOut > RHSKnownZeroOut) {
- if (Add) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
- KnownZero |= KnownZero2 & Mask;
- KnownOne |= KnownOne2 & Mask;
- } else {
- // If the known zeros are in the left operand for a subtract,
- // fall back to the minimum known zeros in both operands.
- KnownZero |= APInt::getLowBitsSet(BitWidth,
- std::min(LHSKnownZeroOut,
- RHSKnownZeroOut));
- }
- } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
- APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
- KnownZero |= LHSKnownZero & Mask;
- KnownOne |= LHSKnownOne & Mask;
+ computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+
+ // Carry in a 1 for a subtract, rather than a 0.
+ APInt CarryIn(BitWidth, 0);
+ if (!Add) {
+ // Sum = LHS + ~RHS + 1
+ std::swap(KnownZero2, KnownOne2);
+ CarryIn.setBit(0);
}
+ APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn;
+ APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn;
+
+ // Compute known bits of the carry.
+ APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2);
+ APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2;
+
+ // Compute set of known bits (where all three relevant bits are known).
+ APInt LHSKnown = LHSKnownZero | LHSKnownOne;
+ APInt RHSKnown = KnownZero2 | KnownOne2;
+ APInt CarryKnown = CarryKnownZero | CarryKnownOne;
+ APInt Known = LHSKnown & RHSKnown & CarryKnown;
+
+ assert((PossibleSumZero & Known) == (PossibleSumOne & Known) &&
+ "known bits of sum differ");
+
+ // Compute known bits of the result.
+ KnownZero = ~PossibleSumOne & Known;
+ KnownOne = PossibleSumOne & Known;
+
// Are we still trying to solve for the sign bit?
- if (!KnownZero.isNegative() && !KnownOne.isNegative()) {
+ if (!Known.isNegative()) {
if (NSW) {
- if (Add) {
- // Adding two positive numbers can't wrap into negative
- if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // and adding two negative numbers can't wrap into positive.
- else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- } else {
- // Subtracting a negative number from a positive one can't wrap
- if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
- // neither can subtracting a positive number from a negative one.
- else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
- }
+ // Adding two non-negative numbers, or subtracting a negative number from
+ // a non-negative one, can't wrap into negative.
+ if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+ KnownZero |= APInt::getSignBit(BitWidth);
+ // Adding two negative numbers, or subtracting a non-negative number from
+ // a negative one, can't wrap into non-negative.
+ else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+ KnownOne |= APInt::getSignBit(BitWidth);
}
}
}
@@ -132,10 +243,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth) {
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
unsigned BitWidth = KnownZero.getBitWidth();
- computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q);
bool isKnownNegative = false;
bool isKnownNonNegative = false;
@@ -156,9 +268,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
// negative or zero.
if (!isKnownNonNegative)
isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
- isKnownNonZero(Op0, TD, Depth)) ||
+ isKnownNonZero(Op0, TD, Depth, Q)) ||
(isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
- isKnownNonZero(Op1, TD, Depth));
+ isKnownNonZero(Op1, TD, Depth, Q));
}
}
@@ -198,8 +310,10 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
// Use the high end of the ranges to find leading zeros.
unsigned MinLeadingZeros = BitWidth;
for (unsigned i = 0; i < NumRanges; ++i) {
- ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0));
- ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1));
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
ConstantRange Range(Lower->getValue(), Upper->getValue());
if (Range.isWrappedSet())
MinLeadingZeros = 0; // -1 has no zeros
@@ -210,6 +324,414 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
}
+static bool isEphemeralValueOf(Instruction *I, const Value *E) {
+ SmallVector<const Value *, 16> WorkSet(1, I);
+ SmallPtrSet<const Value *, 32> Visited;
+ SmallPtrSet<const Value *, 16> EphValues;
+
+ while (!WorkSet.empty()) {
+ const Value *V = WorkSet.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ // If all uses of this value are ephemeral, then so is this value.
+ bool FoundNEUse = false;
+ for (const User *I : V->users())
+ if (!EphValues.count(I)) {
+ FoundNEUse = true;
+ break;
+ }
+
+ if (!FoundNEUse) {
+ if (V == E)
+ return true;
+
+ EphValues.insert(V);
+ if (const User *U = dyn_cast<User>(V))
+ for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
+ J != JE; ++J) {
+ if (isSafeToSpeculativelyExecute(*J))
+ WorkSet.push_back(*J);
+ }
+ }
+ }
+
+ return false;
+}
+
+// Is this an intrinsic that cannot be speculated but also cannot trap?
+static bool isAssumeLikeIntrinsic(const Instruction *I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (Function *F = CI->getCalledFunction())
+ switch (F->getIntrinsicID()) {
+ default: break;
+ // FIXME: This list is repeated from NoTTI::getIntrinsicCost.
+ case Intrinsic::assume:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ return true;
+ }
+
+ return false;
+}
+
+static bool isValidAssumeForContext(Value *V, const Query &Q,
+ const DataLayout *DL) {
+ Instruction *Inv = cast<Instruction>(V);
+
+ // There are two restrictions on the use of an assume:
+ // 1. The assume must dominate the context (or the control flow must
+ // reach the assume whenever it reaches the context).
+ // 2. The context must not be in the assume's set of ephemeral values
+ // (otherwise we will use the assume to prove that the condition
+ // feeding the assume is trivially true, thus causing the removal of
+ // the assume).
+
+ if (Q.DT) {
+ if (Q.DT->dominates(Inv, Q.CxtI)) {
+ return true;
+ } else if (Inv->getParent() == Q.CxtI->getParent()) {
+ // The context comes first, but they're both in the same block. Make sure
+ // there is nothing in between that might interrupt the control flow.
+ for (BasicBlock::const_iterator I =
+ std::next(BasicBlock::const_iterator(Q.CxtI)),
+ IE(Inv); I != IE; ++I)
+ if (!isSafeToSpeculativelyExecute(I, DL) &&
+ !isAssumeLikeIntrinsic(I))
+ return false;
+
+ return !isEphemeralValueOf(Inv, Q.CxtI);
+ }
+
+ return false;
+ }
+
+ // When we don't have a DT, we do a limited search...
+ if (Inv->getParent() == Q.CxtI->getParent()->getSinglePredecessor()) {
+ return true;
+ } else if (Inv->getParent() == Q.CxtI->getParent()) {
+ // Search forward from the assume until we reach the context (or the end
+ // of the block); the common case is that the assume will come first.
+ for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
+ IE = Inv->getParent()->end(); I != IE; ++I)
+ if (I == Q.CxtI)
+ return true;
+
+ // The context must come first...
+ for (BasicBlock::const_iterator I =
+ std::next(BasicBlock::const_iterator(Q.CxtI)),
+ IE(Inv); I != IE; ++I)
+ if (!isSafeToSpeculativelyExecute(I, DL) &&
+ !isAssumeLikeIntrinsic(I))
+ return false;
+
+ return !isEphemeralValueOf(Inv, Q.CxtI);
+ }
+
+ return false;
+}
+
+bool llvm::isValidAssumeForContext(const Instruction *I,
+ const Instruction *CxtI,
+ const DataLayout *DL,
+ const DominatorTree *DT) {
+ return ::isValidAssumeForContext(const_cast<Instruction*>(I),
+ Query(nullptr, CxtI, DT), DL);
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>,
+ CmpClass_match<RHS, LHS, ICmpInst, ICmpInst::Predicate>>
+m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
+ return m_CombineOr(m_ICmp(Pred, L, R), m_ICmp(Pred, R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::And>,
+ BinaryOp_match<RHS, LHS, Instruction::And>>
+m_c_And(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_And(L, R), m_And(R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Or>,
+ BinaryOp_match<RHS, LHS, Instruction::Or>>
+m_c_Or(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_Or(L, R), m_Or(R, L));
+}
+
+template<typename LHS, typename RHS>
+inline match_combine_or<BinaryOp_match<LHS, RHS, Instruction::Xor>,
+ BinaryOp_match<RHS, LHS, Instruction::Xor>>
+m_c_Xor(const LHS &L, const RHS &R) {
+ return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
+}
+
+static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout *DL,
+ unsigned Depth, const Query &Q) {
+ // Use of assumptions is context-sensitive. If we don't have a context, we
+ // cannot use them!
+ if (!Q.AC || !Q.CxtI)
+ return;
+
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ for (auto &AssumeVH : Q.AC->assumptions()) {
+ if (!AssumeVH)
+ continue;
+ CallInst *I = cast<CallInst>(AssumeVH);
+ assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
+ "Got assumption for the wrong function!");
+ if (Q.ExclInvs.count(I))
+ continue;
+
+ // Warning: This loop can end up being somewhat performance sensetive.
+ // We're running this loop for once for each value queried resulting in a
+ // runtime of ~O(#assumes * #values).
+
+ assert(isa<IntrinsicInst>(I) &&
+ dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume &&
+ "must be an assume intrinsic");
+
+ Value *Arg = I->getArgOperand(0);
+
+ if (Arg == V &&
+ isValidAssumeForContext(I, Q, DL)) {
+ assert(BitWidth == 1 && "assume operand is not i1?");
+ KnownZero.clearAllBits();
+ KnownOne.setAllBits();
+ return;
+ }
+
+ // The remaining tests are all recursive, so bail out if we hit the limit.
+ if (Depth == MaxDepth)
+ continue;
+
+ Value *A, *B;
+ auto m_V = m_CombineOr(m_Specific(V),
+ m_CombineOr(m_PtrToInt(m_Specific(V)),
+ m_BitCast(m_Specific(V))));
+
+ CmpInst::Predicate Pred;
+ ConstantInt *C;
+ // assume(v = a)
+ if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ KnownZero |= RHSKnownZero;
+ KnownOne |= RHSKnownOne;
+ // assume(v & b = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
+ computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // known bits from the RHS to V.
+ KnownZero |= RHSKnownZero & MaskKnownOne;
+ KnownOne |= RHSKnownOne & MaskKnownOne;
+ // assume(~(v & b) = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
+ computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // inverted known bits from the RHS to V.
+ KnownZero |= RHSKnownOne & MaskKnownOne;
+ KnownOne |= RHSKnownZero & MaskKnownOne;
+ // assume(v | b = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V.
+ KnownZero |= RHSKnownZero & BKnownZero;
+ KnownOne |= RHSKnownOne & BKnownZero;
+ // assume(~(v | b) = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V.
+ KnownZero |= RHSKnownOne & BKnownZero;
+ KnownOne |= RHSKnownZero & BKnownZero;
+ // assume(v ^ b = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V. For those bits in B that are known to be one,
+ // we can propagate inverted known bits from the RHS to V.
+ KnownZero |= RHSKnownZero & BKnownZero;
+ KnownOne |= RHSKnownOne & BKnownZero;
+ KnownZero |= RHSKnownOne & BKnownOne;
+ KnownOne |= RHSKnownZero & BKnownOne;
+ // assume(~(v ^ b) = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
+ computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I));
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V. For those bits in B that are
+ // known to be one, we can propagate known bits from the RHS to V.
+ KnownZero |= RHSKnownOne & BKnownZero;
+ KnownOne |= RHSKnownZero & BKnownZero;
+ KnownZero |= RHSKnownZero & BKnownOne;
+ KnownOne |= RHSKnownOne & BKnownOne;
+ // assume(v << c = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ KnownZero |= RHSKnownZero.lshr(C->getZExtValue());
+ KnownOne |= RHSKnownOne.lshr(C->getZExtValue());
+ // assume(~(v << c) = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ KnownZero |= RHSKnownOne.lshr(C->getZExtValue());
+ KnownOne |= RHSKnownZero.lshr(C->getZExtValue());
+ // assume(v >> c = a)
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
+ m_AShr(m_V,
+ m_ConstantInt(C))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ KnownZero |= RHSKnownZero << C->getZExtValue();
+ KnownOne |= RHSKnownOne << C->getZExtValue();
+ // assume(~(v >> c) = a)
+ } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
+ m_LShr(m_V, m_ConstantInt(C)),
+ m_AShr(m_V, m_ConstantInt(C)))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ KnownZero |= RHSKnownOne << C->getZExtValue();
+ KnownOne |= RHSKnownZero << C->getZExtValue();
+ // assume(v >=_s c) where c is non-negative
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+ Pred == ICmpInst::ICMP_SGE &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownZero.isNegative()) {
+ // We know that the sign bit is zero.
+ KnownZero |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v >_s c) where c is at least -1.
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+ Pred == ICmpInst::ICMP_SGT &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) {
+ // We know that the sign bit is zero.
+ KnownZero |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v <=_s c) where c is negative
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+ Pred == ICmpInst::ICMP_SLE &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownOne.isNegative()) {
+ // We know that the sign bit is one.
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v <_s c) where c is non-positive
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+ Pred == ICmpInst::ICMP_SLT &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) {
+ // We know that the sign bit is one.
+ KnownOne |= APInt::getSignBit(BitWidth);
+ }
+ // assume(v <=_u c)
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+ Pred == ICmpInst::ICMP_ULE &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ // Whatever high bits in c are zero are known to be zero.
+ KnownZero |=
+ APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+ // assume(v <_u c)
+ } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
+ Pred == ICmpInst::ICMP_ULT &&
+ isValidAssumeForContext(I, Q, DL)) {
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
+
+ // Whatever high bits in c are zero are known to be zero (if c is a power
+ // of 2, then one more).
+ if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I)))
+ KnownZero |=
+ APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
+ else
+ KnownZero |=
+ APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+ }
+ }
+}
+
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
///
@@ -225,8 +747,9 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth) {
+void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = KnownZero.getBitWidth();
@@ -272,10 +795,10 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
}
// The address of an aligned GlobalValue has trailing zeros.
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- unsigned Align = GV->getAlignment();
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
+ unsigned Align = GO->getAlignment();
if (Align == 0 && TD) {
- if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
Type *ObjectType = GVar->getType()->getElementType();
if (ObjectType->isSized()) {
// If the object is defined in the current Module, we'll be giving
@@ -296,25 +819,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownOne.clearAllBits();
return;
}
- // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
- // the bits of its aliasee.
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->mayBeOverridden()) {
- KnownZero.clearAllBits(); KnownOne.clearAllBits();
- } else {
- computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
- }
- return;
- }
if (Argument *A = dyn_cast<Argument>(V)) {
- unsigned Align = 0;
+ unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
- if (A->hasByValOrInAllocaAttr()) {
- // Get alignment information off byval/inalloca arguments if specified in
- // the IR.
- Align = A->getParamAlignment();
- } else if (TD && A->hasStructRetAttr()) {
+ if (!Align && TD && A->hasStructRetAttr()) {
// An sret parameter has at least the ABI alignment of the return type.
Type *EltTy = cast<PointerType>(A->getType())->getElementType();
if (EltTy->isSized())
@@ -323,14 +832,34 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Align)
KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+ else
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+
+ // Don't give up yet... there might be an assumption that provides more
+ // information...
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
return;
}
// Start out not knowing anything.
KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ // Limit search depth.
+ // All recursive calls that increase depth must come after this.
if (Depth == MaxDepth)
- return; // Limit search depth.
+ return;
+
+ // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+ // the bits of its aliasee.
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->mayBeOverridden())
+ computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q);
+ return;
+ }
+
+ // Check whether a nearby assume intrinsic can determine some known bits.
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
Operator *I = dyn_cast<Operator>(V);
if (!I) return;
@@ -344,8 +873,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
@@ -354,8 +883,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Or: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Output known-0 bits are only known if clear in both the LHS & RHS.
KnownZero &= KnownZero2;
@@ -364,8 +893,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Xor: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -377,19 +906,20 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::Mul: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+ Depth, Q);
break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
@@ -399,9 +929,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Select:
- computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
- Depth+1);
+ computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
@@ -437,7 +966,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
assert(SrcBitWidth && "SrcBitWidth can't be zero");
KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = KnownZero.zextOrTrunc(BitWidth);
KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
@@ -451,7 +980,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
break;
}
break;
@@ -462,7 +991,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -478,11 +1007,10 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero <<= ShiftAmt;
KnownOne <<= ShiftAmt;
KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
- break;
}
break;
case Instruction::LShr:
@@ -492,12 +1020,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
// Unsigned shift right.
- computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
// high bits known zero.
KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- break;
}
break;
case Instruction::AShr:
@@ -507,7 +1034,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -516,21 +1043,20 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero |= HighBits;
else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
KnownOne |= HighBits;
- break;
}
break;
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth);
+ Depth, Q);
break;
}
case Instruction::Add: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth);
+ Depth, Q);
break;
}
case Instruction::SRem:
@@ -538,7 +1064,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD,
+ Depth+1, Q);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -563,7 +1090,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
- Depth+1);
+ Depth+1, Q);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(BitWidth - 1);
@@ -576,7 +1103,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD,
- Depth+1);
+ Depth+1, Q);
KnownZero |= ~LowBits;
KnownOne &= LowBits;
break;
@@ -585,8 +1112,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
@@ -610,7 +1137,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// to determine if we can prove known low zero bits.
APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
- Depth+1);
+ Depth+1, Q);
unsigned TrailZ = LocalKnownZero.countTrailingOnes();
gep_type_iterator GTI = gep_type_begin(I);
@@ -646,7 +1173,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q);
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
LocalKnownZero.countTrailingOnes()));
@@ -688,11 +1215,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q);
// We need to take the minimum number of known bits
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
- computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1);
+ computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q);
KnownZero = APInt::getLowBitsSet(BitWidth,
std::min(KnownZero2.countTrailingOnes(),
@@ -724,7 +1251,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
- MaxDepth-1);
+ MaxDepth-1, Q);
KnownZero &= KnownZero2;
KnownOne &= KnownOne2;
// If all bits have been ruled out, there's no need to check
@@ -776,19 +1303,19 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Intrinsic::sadd_with_overflow:
computeKnownBitsAddSub(true, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
computeKnownBitsAddSub(false, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1),
false, KnownZero, KnownOne,
- KnownZero2, KnownOne2, TD, Depth);
+ KnownZero2, KnownOne2, TD, Depth, Q);
break;
}
}
@@ -798,10 +1325,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
-/// ComputeSignBit - Determine whether the sign bit is known to be zero or
-/// one. Convenience wrapper around computeKnownBits.
-void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth) {
+/// Determine whether the sign bit is known to be zero or one.
+/// Convenience wrapper around computeKnownBits.
+void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
unsigned BitWidth = getBitWidth(V->getType(), TD);
if (!BitWidth) {
KnownZero = false;
@@ -810,16 +1338,17 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
}
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
- computeKnownBits(V, ZeroBits, OneBits, TD, Depth);
+ computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q);
KnownOne = OneBits[BitWidth - 1];
KnownZero = ZeroBits[BitWidth - 1];
}
-/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one
+/// Return true if the given value is known to have exactly one
/// bit set when defined. For vectors return true if every element is known to
-/// be a power of two when defined. Supports values with integer or pointer
+/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
+bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+ const Query &Q) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return OrZero;
@@ -846,19 +1375,20 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
// A shift of a power of two is a power of two or zero.
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
match(V, m_Shr(m_Value(X), m_Value()))))
- return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth);
+ return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q);
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
- return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth);
+ return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
if (SelectInst *SI = dyn_cast<SelectInst>(V))
- return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) &&
- isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth);
+ return
+ isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
+ isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
// A power of two and'd with anything is a power of two or zero.
- if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) ||
- isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth))
+ if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) ||
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q))
return true;
// X & (-X) is always a power of two or zero.
if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
@@ -873,19 +1403,19 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
if (match(X, m_And(m_Specific(Y), m_Value())) ||
match(X, m_And(m_Value(), m_Specific(Y))))
- if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth))
+ if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
return true;
if (match(Y, m_And(m_Specific(X), m_Value())) ||
match(Y, m_And(m_Value(), m_Specific(X))))
- if (isKnownToBeAPowerOfTwo(X, OrZero, Depth))
+ if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
return true;
unsigned BitWidth = V->getType()->getScalarSizeInBits();
APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
- computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth);
+ computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q);
APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
- computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth);
+ computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q);
// If i8 V is a power of two or zero:
// ZeroBits: 1 1 1 0 1 1 1 1
// ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -902,7 +1432,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
// copying a sign bit (sdiv int_min, 2).
if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
- return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero, Depth);
+ return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
+ Depth, Q);
}
return false;
@@ -915,7 +1446,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
///
/// Currently this routine does not support vector GEPs.
static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
- unsigned Depth) {
+ unsigned Depth, const Query &Q) {
if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
return false;
@@ -924,7 +1455,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
// If the base pointer is non-null, we cannot walk to a null address with an
// inbounds GEP in address space zero.
- if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth))
+ if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q))
return true;
// Past this, if we don't have DataLayout, we can't do much.
@@ -967,18 +1498,38 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
if (Depth++ >= MaxDepth)
continue;
- if (isKnownNonZero(GTI.getOperand(), DL, Depth))
+ if (isKnownNonZero(GTI.getOperand(), DL, Depth, Q))
return true;
}
return false;
}
-/// isKnownNonZero - Return true if the given value is known to be non-zero
-/// when defined. For vectors return true if every element is known to be
-/// non-zero when defined. Supports values with integer or pointer type and
-/// vectors of integers.
-bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
+/// Does the 'Range' metadata (which must be a valid MD_range operand list)
+/// ensure that the value it's attached to is never Value? 'RangeType' is
+/// is the type of the value described by the range.
+static bool rangeMetadataExcludesValue(MDNode* Ranges,
+ const APInt& Value) {
+ const unsigned NumRanges = Ranges->getNumOperands() / 2;
+ assert(NumRanges >= 1);
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Lower =
+ mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
+ ConstantInt *Upper =
+ mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
+ ConstantRange Range(Lower->getValue(), Upper->getValue());
+ if (Range.contains(Value))
+ return false;
+ }
+ return true;
+}
+
+/// Return true if the given value is known to be non-zero when defined.
+/// For vectors return true if every element is known to be non-zero when
+/// defined. Supports values with integer or pointer type and vectors of
+/// integers.
+bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return false;
@@ -989,6 +1540,18 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
return false;
}
+ if (Instruction* I = dyn_cast<Instruction>(V)) {
+ if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
+ // If the possible ranges don't contain zero, then the value is
+ // definitely non-zero.
+ if (IntegerType* Ty = dyn_cast<IntegerType>(V->getType())) {
+ const APInt ZeroValue(Ty->getBitWidth(), 0);
+ if (rangeMetadataExcludesValue(Ranges, ZeroValue))
+ return true;
+ }
+ }
+ }
+
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ >= MaxDepth)
return false;
@@ -998,7 +1561,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
if (isKnownNonNull(V))
return true;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- if (isGEPKnownNonNull(GEP, TD, Depth))
+ if (isGEPKnownNonNull(GEP, TD, Depth, Q))
return true;
}
@@ -1007,11 +1570,12 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// X | Y != 0 if X != 0 or Y != 0.
Value *X = nullptr, *Y = nullptr;
if (match(V, m_Or(m_Value(X), m_Value(Y))))
- return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q) ||
+ isKnownNonZero(Y, TD, Depth, Q);
// ext X != 0 if X != 0.
if (isa<SExtInst>(V) || isa<ZExtInst>(V))
- return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth, Q);
// shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
// if the lowest bit is shifted off the end.
@@ -1019,11 +1583,11 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// shl nuw can't remove any non-zero bits.
OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
if (BO->hasNoUnsignedWrap())
- return isKnownNonZero(X, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q);
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
if (KnownOne[0])
return true;
}
@@ -1033,28 +1597,29 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// shr exact can only shift out zero bits.
PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
if (BO->isExact())
- return isKnownNonZero(X, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q);
bool XKnownNonNegative, XKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
if (XKnownNegative)
return true;
}
// div exact can only produce a zero if the dividend is zero.
else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
- return isKnownNonZero(X, TD, Depth);
+ return isKnownNonZero(X, TD, Depth, Q);
}
// X + Y.
else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
bool XKnownNonNegative, XKnownNegative;
bool YKnownNonNegative, YKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
- ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q);
// If X and Y are both non-negative (as signed values) then their sum is not
// zero unless both X and Y are zero.
if (XKnownNonNegative && YKnownNonNegative)
- if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+ if (isKnownNonZero(X, TD, Depth, Q) ||
+ isKnownNonZero(Y, TD, Depth, Q))
return true;
// If X and Y are both negative (as signed values) then their sum is not
@@ -1065,20 +1630,22 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- computeKnownBits(Y, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
}
// The sum of a non-negative number and a power of two is not zero.
- if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth))
+ if (XKnownNonNegative &&
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q))
return true;
- if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth))
+ if (YKnownNonNegative &&
+ isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q))
return true;
}
// X * Y.
@@ -1087,51 +1654,53 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
- isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth))
+ isKnownNonZero(X, TD, Depth, Q) &&
+ isKnownNonZero(Y, TD, Depth, Q))
return true;
}
// (C ? X : Y) != 0 if X != 0 and Y != 0.
else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
- if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
- isKnownNonZero(SI->getFalseValue(), TD, Depth))
+ if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) &&
+ isKnownNonZero(SI->getFalseValue(), TD, Depth, Q))
return true;
}
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
return KnownOne != 0;
}
-/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
-/// this predicate to simplify operations downstream. Mask is known to be zero
-/// for bits that V cannot have.
+/// Return true if 'V & Mask' is known to be zero. We use this predicate to
+/// simplify operations downstream. Mask is known to be zero for bits that V
+/// cannot have.
///
/// This function is defined on values with integer type, values with pointer
/// type (but only if TD is non-null), and vectors of integers. In the case
/// where V is a vector, the mask, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
- const DataLayout *TD, unsigned Depth) {
+bool MaskedValueIsZero(Value *V, const APInt &Mask,
+ const DataLayout *TD, unsigned Depth,
+ const Query &Q) {
APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
return (KnownZero & Mask) == Mask;
}
-/// ComputeNumSignBits - Return the number of times the sign bit of the
-/// register is replicated into the other bits. We know that at least 1 bit
-/// is always equal to the sign bit (itself), but other cases can give us
-/// information. For example, immediately after an "ashr X, 2", we know that
-/// the top 3 bits are all equal to each other, so we return 3.
+/// Return the number of times the sign bit of the register is replicated into
+/// the other bits. We know that at least 1 bit is always equal to the sign bit
+/// (itself), but other cases can give us information. For example, immediately
+/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
+/// other, so we return 3.
///
/// 'Op' must have a scalar integer type.
///
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
- unsigned Depth) {
+unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
+ unsigned Depth, const Query &Q) {
assert((TD || V->getType()->isIntOrIntVectorTy()) &&
"ComputeNumSignBits requires a DataLayout object to operate "
"on non-integer values!");
@@ -1152,10 +1721,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
default: break;
case Instruction::SExt:
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
- return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+ return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp;
case Instruction::AShr: {
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
// ashr X, C -> adds C sign bits. Vectors too.
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
@@ -1168,7 +1737,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
// shl destroys sign bits.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
Tmp2 = ShAmt->getZExtValue();
if (Tmp2 >= TyBits || // Bad shift.
Tmp2 >= Tmp) break; // Shifted all sign bits out.
@@ -1180,9 +1749,9 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
case Instruction::Or:
case Instruction::Xor: // NOT is handled here.
// Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
@@ -1191,22 +1760,22 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
break;
case Instruction::Select:
- Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q);
return std::min(Tmp, Tmp2);
case Instruction::Add:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
- if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+ if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -1219,19 +1788,19 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
return Tmp;
}
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
if (Tmp2 == 1) return 1;
return std::min(Tmp, Tmp2)-1;
case Instruction::Sub:
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
if (Tmp2 == 1) return 1;
// Handle NEG.
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+ if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
@@ -1247,22 +1816,26 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+ Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(U);
+ unsigned NumIncomingValues = PN->getNumIncomingValues();
// Don't analyze large in-degree PHIs.
- if (PN->getNumIncomingValues() > 4) break;
+ if (NumIncomingValues > 4) break;
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (NumIncomingValues == 0) break;
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
- Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
- for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q);
+ for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
Tmp = std::min(Tmp,
- ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+ ComputeNumSignBits(PN->getIncomingValue(i), TD,
+ Depth+1, Q));
}
return Tmp;
}
@@ -1277,7 +1850,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
// use this information.
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
APInt Mask;
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
@@ -1297,9 +1870,9 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
}
-/// ComputeMultiple - This function computes the integer multiple of Base that
-/// equals V. If successful, it returns true and returns the multiple in
-/// Multiple. If unsuccessful, it returns false. It looks
+/// This function computes the integer multiple of Base that equals V.
+/// If successful, it returns true and returns the multiple in
+/// Multiple. If unsuccessful, it returns false. It looks
/// through SExt instructions only if LookThroughSExt is true.
bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
bool LookThroughSExt, unsigned Depth) {
@@ -1417,8 +1990,8 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
return false;
}
-/// CannotBeNegativeZero - Return true if we can prove that the specified FP
-/// value is never equal to -0.0.
+/// Return true if we can prove that the specified FP value is never equal to
+/// -0.0.
///
/// NOTE: this function will need to be revisited when we support non-default
/// rounding modes!
@@ -1471,8 +2044,8 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return false;
}
-/// isBytewiseValue - If the specified value can be set by repeating the same
-/// byte in memory, return the i8 value that it is represented with. This is
+/// If the specified value can be set by repeating the same byte in memory,
+/// return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
/// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated
/// byte store (e.g. i16 0x1234), return null.
@@ -1620,7 +2193,7 @@ static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
}
-/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// Given an aggregrate and an sequence of indices, see if
/// the scalar value indexed is already around as a register, for example if it
/// were inserted directly into the aggregrate.
///
@@ -1710,9 +2283,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
return nullptr;
}
-/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
-/// it can be expressed as a base pointer plus a constant offset. Return the
-/// base and offset to the caller.
+/// Analyze the specified pointer to see if it can be expressed as a base
+/// pointer plus a constant offset. Return the base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
const DataLayout *DL) {
// Without DataLayout, conservatively assume 64-bit offsets, which is
@@ -1749,9 +2321,9 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
}
-/// getConstantStringInfo - This function computes the length of a
-/// null-terminated C string pointed to by V. If successful, it returns true
-/// and returns the string in Str. If unsuccessful, it returns false.
+/// This function computes the length of a null-terminated C string pointed to
+/// by V. If successful, it returns true and returns the string in Str.
+/// If unsuccessful, it returns false.
bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
uint64_t Offset, bool TrimAtNul) {
assert(V);
@@ -1835,16 +2407,16 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
// nodes.
// TODO: See if we can integrate these two together.
-/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
// Look through noop bitcast instructions.
V = V->stripPointerCasts();
// If this is a PHI node, there are two cases: either we have already seen it
// or we haven't.
if (PHINode *PN = dyn_cast<PHINode>(V)) {
- if (!PHIs.insert(PN))
+ if (!PHIs.insert(PN).second)
return ~0ULL; // already in the set.
// If it was new, see if all the input strings are the same length.
@@ -1884,7 +2456,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
return StrData.size()+1;
}
-/// GetStringLength - If we can compute the length of the string pointed to by
+/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
uint64_t llvm::GetStringLength(Value *V) {
if (!V->getType()->isPointerTy()) return 0;
@@ -1913,7 +2485,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
} else {
// See if InstructionSimplify knows any relevant tricks.
if (Instruction *I = dyn_cast<Instruction>(V))
- // TODO: Acquire a DominatorTree and use it.
+ // TODO: Acquire a DominatorTree and AssumptionCache and use them.
if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
V = Simplified;
continue;
@@ -1938,7 +2510,7 @@ llvm::GetUnderlyingObjects(Value *V,
Value *P = Worklist.pop_back_val();
P = GetUnderlyingObject(P, TD, MaxLookup);
- if (!Visited.insert(P))
+ if (!Visited.insert(P).second)
continue;
if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
@@ -1957,9 +2529,7 @@ llvm::GetUnderlyingObjects(Value *V,
} while (!Worklist.empty());
}
-/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
-/// are lifetime markers.
-///
+/// Return true if the only users of this pointer are lifetime markers.
bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
for (const User *U : V->users()) {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
@@ -2022,41 +2592,44 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return LI->getPointerOperand()->isDereferenceablePointer(TD);
}
case Instruction::Call: {
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- // These synthetic intrinsics have no side-effects and just mark
- // information about their operands.
- // FIXME: There are other no-op synthetic instructions that potentially
- // should be considered at least *safe* to speculate...
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- return true;
-
- case Intrinsic::bswap:
- case Intrinsic::ctlz:
- case Intrinsic::ctpop:
- case Intrinsic::cttz:
- case Intrinsic::objectsize:
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::umul_with_overflow:
- case Intrinsic::usub_with_overflow:
- return true;
- // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set
- // errno like libm sqrt would.
- case Intrinsic::sqrt:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- return true;
- // TODO: some fp intrinsics are marked as having the same error handling
- // as libm. They're safe to speculate when they won't error.
- // TODO: are convert_{from,to}_fp16 safe?
- // TODO: can we list target-specific intrinsics here?
- default: break;
- }
- }
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ switch (II->getIntrinsicID()) {
+ // These synthetic intrinsics have no side-effects and just mark
+ // information about their operands.
+ // FIXME: There are other no-op synthetic instructions that potentially
+ // should be considered at least *safe* to speculate...
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ return true;
+
+ case Intrinsic::bswap:
+ case Intrinsic::ctlz:
+ case Intrinsic::ctpop:
+ case Intrinsic::cttz:
+ case Intrinsic::objectsize:
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ return true;
+ // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set
+ // errno like libm sqrt would.
+ case Intrinsic::sqrt:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ case Intrinsic::fabs:
+ case Intrinsic::minnum:
+ case Intrinsic::maxnum:
+ return true;
+ // TODO: some fp intrinsics are marked as having the same error handling
+ // as libm. They're safe to speculate when they won't error.
+ // TODO: are convert_{from,to}_fp16 safe?
+ // TODO: can we list target-specific intrinsics here?
+ default: break;
+ }
+ }
return false; // The called function could have undefined behavior or
// side-effects, even if marked readnone nounwind.
}
@@ -2079,8 +2652,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
}
}
-/// isKnownNonNull - Return true if we know that the specified value is never
-/// null.
+/// Return true if we know that the specified value is never null.
bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
@@ -2093,6 +2665,10 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return !GV->hasExternalWeakLinkage();
+ // A Load tagged w/nonnull metadata is never null.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+ return LI->getMetadata(LLVMContext::MD_nonnull);
+
if (ImmutableCallSite CS = V)
if (CS.isReturnNonNull())
return true;
@@ -2103,3 +2679,82 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
return false;
}
+
+OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+ const DataLayout *DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ // Multiplying n * m significant bits yields a result of n + m significant
+ // bits. If the total number of significant bits does not exceed the
+ // result bit width (minus 1), there is no overflow.
+ // This means if we have enough leading zero bits in the operands
+ // we can guarantee that the result does not overflow.
+ // Ref: "Hacker's Delight" by Henry Warren
+ unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
+ DT);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
+ DT);
+ // Note that underestimating the number of zero bits gives a more
+ // conservative answer.
+ unsigned ZeroBits = LHSKnownZero.countLeadingOnes() +
+ RHSKnownZero.countLeadingOnes();
+ // First handle the easy case: if we have enough zero bits there's
+ // definitely no overflow.
+ if (ZeroBits >= BitWidth)
+ return OverflowResult::NeverOverflows;
+
+ // Get the largest possible values for each operand.
+ APInt LHSMax = ~LHSKnownZero;
+ APInt RHSMax = ~RHSKnownZero;
+
+ // We know the multiply operation doesn't overflow if the maximum values for
+ // each operand will not overflow after we multiply them together.
+ bool MaxOverflow;
+ LHSMax.umul_ov(RHSMax, MaxOverflow);
+ if (!MaxOverflow)
+ return OverflowResult::NeverOverflows;
+
+ // We know it always overflows if multiplying the smallest possible values for
+ // the operands also results in overflow.
+ bool MinOverflow;
+ LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
+ if (MinOverflow)
+ return OverflowResult::AlwaysOverflows;
+
+ return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+ const DataLayout *DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+ if (LHSKnownNonNegative || LHSKnownNegative) {
+ bool RHSKnownNonNegative, RHSKnownNegative;
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+ AC, CxtI, DT);
+
+ if (LHSKnownNegative && RHSKnownNegative) {
+ // The sign bit is set in both cases: this MUST overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ return OverflowResult::AlwaysOverflows;
+ }
+
+ if (LHSKnownNonNegative && RHSKnownNonNegative) {
+ // The sign bit is clear in both cases: this CANNOT overflow.
+ // Create a simple add instruction, and insert it into the struct.
+ return OverflowResult::NeverOverflows;
+ }
+ }
+
+ return OverflowResult::MayOverflow;
+}