summaryrefslogtreecommitdiff
path: root/lib/Analysis
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-02 19:17:04 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-02 19:17:04 +0000
commitb915e9e0fc85ba6f398b3fab0db6a81a8913af94 (patch)
tree98b8f811c7aff2547cab8642daf372d6c59502fb /lib/Analysis
parent6421cca32f69ac849537a3cff78c352195e99f1b (diff)
downloadsrc-test2-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.tar.gz
src-test2-b915e9e0fc85ba6f398b3fab0db6a81a8913af94.zip
Notes
Diffstat (limited to 'lib/Analysis')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp29
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp2
-rw-r--r--lib/Analysis/AliasAnalysisSummary.cpp34
-rw-r--r--lib/Analysis/AliasAnalysisSummary.h56
-rw-r--r--lib/Analysis/AliasSetTracker.cpp351
-rw-r--r--lib/Analysis/Analysis.cpp10
-rw-r--r--lib/Analysis/AssumptionCache.cpp4
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp143
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp32
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp17
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp40
-rw-r--r--lib/Analysis/CFGPrinter.cpp111
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp484
-rw-r--r--lib/Analysis/CFLGraph.h128
-rw-r--r--lib/Analysis/CFLSteensAliasAnalysis.cpp78
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp501
-rw-r--r--lib/Analysis/CMakeLists.txt6
-rw-r--r--lib/Analysis/CallGraph.cpp4
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp12
-rw-r--r--lib/Analysis/CodeMetrics.cpp66
-rw-r--r--lib/Analysis/ConstantFolding.cpp478
-rw-r--r--lib/Analysis/CostModel.cpp47
-rw-r--r--lib/Analysis/DemandedBits.cpp13
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp2
-rw-r--r--lib/Analysis/DominanceFrontier.cpp2
-rw-r--r--lib/Analysis/EHPersonalities.cpp25
-rw-r--r--lib/Analysis/GlobalsModRef.cpp24
-rw-r--r--lib/Analysis/IVUsers.cpp6
-rw-r--r--lib/Analysis/InlineCost.cpp266
-rw-r--r--lib/Analysis/InstructionSimplify.cpp1570
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp2
-rw-r--r--lib/Analysis/LLVMBuild.txt2
-rw-r--r--lib/Analysis/LazyBlockFrequencyInfo.cpp15
-rw-r--r--lib/Analysis/LazyBranchProbabilityInfo.cpp63
-rw-r--r--lib/Analysis/LazyCallGraph.cpp1007
-rw-r--r--lib/Analysis/LazyValueInfo.cpp990
-rw-r--r--lib/Analysis/Lint.cpp37
-rw-r--r--lib/Analysis/Loads.cpp24
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp188
-rw-r--r--lib/Analysis/LoopInfo.cpp124
-rw-r--r--lib/Analysis/LoopPass.cpp28
-rw-r--r--lib/Analysis/LoopPassManager.cpp22
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp129
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp172
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp13
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp345
-rw-r--r--lib/Analysis/ObjCARCAliasAnalysis.cpp2
-rw-r--r--lib/Analysis/ObjCARCInstKind.cpp71
-rw-r--r--lib/Analysis/OptimizationDiagnosticInfo.cpp189
-rw-r--r--lib/Analysis/PHITransAddr.cpp14
-rw-r--r--lib/Analysis/PostDominators.cpp2
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp72
-rw-r--r--lib/Analysis/RegionInfo.cpp9
-rw-r--r--lib/Analysis/RegionPass.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp1484
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp96
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp17
-rw-r--r--lib/Analysis/StratifiedSets.h13
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp161
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp61
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp173
-rw-r--r--lib/Analysis/TypeMetadataUtils.cpp3
-rw-r--r--lib/Analysis/ValueTracking.cpp647
-rw-r--r--lib/Analysis/VectorUtils.cpp16
65 files changed, 6919 insertions, 3823 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index f931b6fc6523..84da76be98bb 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -53,7 +53,8 @@ using namespace llvm;
static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden,
cl::init(false));
-AAResults::AAResults(AAResults &&Arg) : TLI(Arg.TLI), AAs(std::move(Arg.AAs)) {
+AAResults::AAResults(AAResults &&Arg)
+ : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {
for (auto &AA : AAs)
AA->setAAResults(this);
}
@@ -69,6 +70,22 @@ AAResults::~AAResults() {
#endif
}
+bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // Check if the AA manager itself has been invalidated.
+ auto PAC = PA.getChecker<AAManager>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+ return true; // The manager needs to be blown away, clear everything.
+
+ // Check all of the dependencies registered.
+ for (AnalysisKey *ID : AADeps)
+ if (Inv.invalidate(ID, F, PA))
+ return true;
+
+ // Everything we depend on is still fine, so are we. Nothing to invalidate.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Default chaining methods
//===----------------------------------------------------------------------===//
@@ -141,7 +158,8 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
// Try to refine the mod-ref info further using other API entry points to the
// aggregate set of AA results.
auto MRB = getModRefBehavior(CS);
- if (MRB == FMRB_DoesNotAccessMemory)
+ if (MRB == FMRB_DoesNotAccessMemory ||
+ MRB == FMRB_OnlyAccessesInaccessibleMem)
return MRI_NoModRef;
if (onlyReadsMemory(MRB))
@@ -149,7 +167,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
else if (doesNotReadMemory(MRB))
Result = ModRefInfo(Result & MRI_Mod);
- if (onlyAccessesArgPointees(MRB)) {
+ if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
bool DoesAlias = false;
ModRefInfo AllArgsMask = MRI_NoModRef;
if (doesAccessArgPointees(MRB)) {
@@ -459,7 +477,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+ (!CS.doesNotCapture(ArgNo) &&
+ ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo)))
continue;
// If this is a no-capture pointer argument, see if we can tell that it
@@ -512,7 +531,7 @@ bool AAResults::canInstructionRangeModRef(const Instruction &I1,
AAResults::Concept::~Concept() {}
// Provide a definition for the static object used to identify passes.
-char AAManager::PassID;
+AnalysisKey AAManager::Key;
namespace {
/// A wrapper pass for external alias analyses. This just squirrels away the
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index baf8f3f881db..4d6a6c9a30aa 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -88,7 +88,7 @@ static inline bool isInterestingPointer(Value *V) {
&& !isa<ConstantPointerNull>(V);
}
-PreservedAnalyses AAEvaluator::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses AAEvaluator::run(Function &F, FunctionAnalysisManager &AM) {
runInternal(F, AM.getResult<AAManager>(F));
return PreservedAnalyses::all();
}
diff --git a/lib/Analysis/AliasAnalysisSummary.cpp b/lib/Analysis/AliasAnalysisSummary.cpp
index f3f13df283db..2b4879453beb 100644
--- a/lib/Analysis/AliasAnalysisSummary.cpp
+++ b/lib/Analysis/AliasAnalysisSummary.cpp
@@ -7,25 +7,23 @@ namespace llvm {
namespace cflaa {
namespace {
-LLVM_CONSTEXPR unsigned AttrEscapedIndex = 0;
-LLVM_CONSTEXPR unsigned AttrUnknownIndex = 1;
-LLVM_CONSTEXPR unsigned AttrGlobalIndex = 2;
-LLVM_CONSTEXPR unsigned AttrCallerIndex = 3;
-LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 4;
-LLVM_CONSTEXPR unsigned AttrLastArgIndex = NumAliasAttrs;
-LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
+const unsigned AttrEscapedIndex = 0;
+const unsigned AttrUnknownIndex = 1;
+const unsigned AttrGlobalIndex = 2;
+const unsigned AttrCallerIndex = 3;
+const unsigned AttrFirstArgIndex = 4;
+const unsigned AttrLastArgIndex = NumAliasAttrs;
+const unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
-// NOTE: These aren't AliasAttrs because bitsets don't have a constexpr
-// ctor for some versions of MSVC that we support. We could maybe refactor,
-// but...
+// It would be *slightly* prettier if we changed these to AliasAttrs, but it
+// seems that both GCC and MSVC emit dynamic initializers for const bitsets.
using AliasAttr = unsigned;
-LLVM_CONSTEXPR AliasAttr AttrNone = 0;
-LLVM_CONSTEXPR AliasAttr AttrEscaped = 1 << AttrEscapedIndex;
-LLVM_CONSTEXPR AliasAttr AttrUnknown = 1 << AttrUnknownIndex;
-LLVM_CONSTEXPR AliasAttr AttrGlobal = 1 << AttrGlobalIndex;
-LLVM_CONSTEXPR AliasAttr AttrCaller = 1 << AttrCallerIndex;
-LLVM_CONSTEXPR AliasAttr ExternalAttrMask =
- AttrEscaped | AttrUnknown | AttrGlobal;
+const AliasAttr AttrNone = 0;
+const AliasAttr AttrEscaped = 1 << AttrEscapedIndex;
+const AliasAttr AttrUnknown = 1 << AttrUnknownIndex;
+const AliasAttr AttrGlobal = 1 << AttrGlobalIndex;
+const AliasAttr AttrCaller = 1 << AttrCallerIndex;
+const AliasAttr ExternalAttrMask = AttrEscaped | AttrUnknown | AttrGlobal;
}
AliasAttrs getAttrNone() { return AttrNone; }
@@ -91,7 +89,7 @@ instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) {
auto To = instantiateInterfaceValue(ERelation.To, CS);
if (!To)
return None;
- return InstantiatedRelation{*From, *To};
+ return InstantiatedRelation{*From, *To, ERelation.Offset};
}
Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
diff --git a/lib/Analysis/AliasAnalysisSummary.h b/lib/Analysis/AliasAnalysisSummary.h
index 43c0d4cb14f9..51a85f4e7061 100644
--- a/lib/Analysis/AliasAnalysisSummary.h
+++ b/lib/Analysis/AliasAnalysisSummary.h
@@ -99,7 +99,7 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs);
//===----------------------------------------------------------------------===//
/// The maximum number of arguments we can put into a summary.
-LLVM_CONSTEXPR static unsigned MaxSupportedArgsInSummary = 50;
+static const unsigned MaxSupportedArgsInSummary = 50;
/// We use InterfaceValue to describe parameters/return value, as well as
/// potential memory locations that are pointed to by parameters/return value,
@@ -120,13 +120,66 @@ inline bool operator==(InterfaceValue LHS, InterfaceValue RHS) {
inline bool operator!=(InterfaceValue LHS, InterfaceValue RHS) {
return !(LHS == RHS);
}
+inline bool operator<(InterfaceValue LHS, InterfaceValue RHS) {
+ return LHS.Index < RHS.Index ||
+ (LHS.Index == RHS.Index && LHS.DerefLevel < RHS.DerefLevel);
+}
+inline bool operator>(InterfaceValue LHS, InterfaceValue RHS) {
+ return RHS < LHS;
+}
+inline bool operator<=(InterfaceValue LHS, InterfaceValue RHS) {
+ return !(RHS < LHS);
+}
+inline bool operator>=(InterfaceValue LHS, InterfaceValue RHS) {
+ return !(LHS < RHS);
+}
+
+// We use UnknownOffset to represent pointer offsets that cannot be determined
+// at compile time. Note that MemoryLocation::UnknownSize cannot be used here
+// because we require a signed value.
+static const int64_t UnknownOffset = INT64_MAX;
+
+inline int64_t addOffset(int64_t LHS, int64_t RHS) {
+ if (LHS == UnknownOffset || RHS == UnknownOffset)
+ return UnknownOffset;
+ // FIXME: Do we need to guard against integer overflow here?
+ return LHS + RHS;
+}
/// We use ExternalRelation to describe an externally visible aliasing relations
/// between parameters/return value of a function.
struct ExternalRelation {
InterfaceValue From, To;
+ int64_t Offset;
};
+inline bool operator==(ExternalRelation LHS, ExternalRelation RHS) {
+ return LHS.From == RHS.From && LHS.To == RHS.To && LHS.Offset == RHS.Offset;
+}
+inline bool operator!=(ExternalRelation LHS, ExternalRelation RHS) {
+ return !(LHS == RHS);
+}
+inline bool operator<(ExternalRelation LHS, ExternalRelation RHS) {
+ if (LHS.From < RHS.From)
+ return true;
+ if (LHS.From > RHS.From)
+ return false;
+ if (LHS.To < RHS.To)
+ return true;
+ if (LHS.To > RHS.To)
+ return false;
+ return LHS.Offset < RHS.Offset;
+}
+inline bool operator>(ExternalRelation LHS, ExternalRelation RHS) {
+ return RHS < LHS;
+}
+inline bool operator<=(ExternalRelation LHS, ExternalRelation RHS) {
+ return !(RHS < LHS);
+}
+inline bool operator>=(ExternalRelation LHS, ExternalRelation RHS) {
+ return !(LHS < RHS);
+}
+
/// We use ExternalAttribute to describe an externally visible AliasAttrs
/// for parameters/return value.
struct ExternalAttribute {
@@ -174,6 +227,7 @@ inline bool operator>=(InstantiatedValue LHS, InstantiatedValue RHS) {
/// callsite
struct InstantiatedRelation {
InstantiatedValue From, To;
+ int64_t Offset;
};
Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation,
CallSite);
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index d349ac51a9b9..701b0e1a5925 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -26,12 +26,19 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<unsigned>
+ SaturationThreshold("alias-set-saturation-threshold", cl::Hidden,
+ cl::init(250),
+ cl::desc("The maximum number of pointers may-alias "
+ "sets may contain before degradation"));
+
/// mergeSetIn - Merge the specified alias set into this alias set.
///
void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
assert(!AS.Forward && "Alias set is already forwarding!");
assert(!Forward && "This set is a forwarding set!!");
+ bool WasMustAlias = (Alias == SetMustAlias);
// Update the alias and access types of this set...
Access |= AS.Access;
Alias |= AS.Alias;
@@ -52,6 +59,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
Alias = SetMayAlias;
}
+ if (Alias == SetMayAlias) {
+ if (WasMustAlias)
+ AST.TotalMayAliasSetSize += size();
+ if (AS.Alias == SetMustAlias)
+ AST.TotalMayAliasSetSize += AS.size();
+ }
+
bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
if (UnknownInsts.empty()) { // Merge call sites...
if (ASHadUnknownInsts) {
@@ -63,11 +77,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
AS.UnknownInsts.clear();
}
- AS.Forward = this; // Forward across AS now...
- addRef(); // AS is now pointing to us...
+ AS.Forward = this; // Forward across AS now...
+ addRef(); // AS is now pointing to us...
// Merge the list of constituent pointers...
if (AS.PtrList) {
+ SetSize += AS.size();
+ AS.SetSize = 0;
*PtrListEnd = AS.PtrList;
AS.PtrList->setPrevInList(PtrListEnd);
PtrListEnd = AS.PtrListEnd;
@@ -85,7 +101,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
Fwd->dropRef(*this);
AS->Forward = nullptr;
}
+
+ if (AS->Alias == AliasSet::SetMayAlias)
+ TotalMayAliasSetSize -= AS->size();
+
AliasSets.erase(AS);
+
}
void AliasSet::removeFromTracker(AliasSetTracker &AST) {
@@ -105,10 +126,13 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
AliasResult Result =
AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
MemoryLocation(Entry.getValue(), Size, AAInfo));
- if (Result != MustAlias)
+ if (Result != MustAlias) {
Alias = SetMayAlias;
- else // First entry of must alias must have maximum size!
+ AST.TotalMayAliasSetSize += size();
+ } else {
+ // First entry of must alias must have maximum size!
P->updateSizeAndAAInfo(Size, AAInfo);
+ }
assert(Result != NoAlias && "Cannot be part of must set!");
}
@@ -116,11 +140,16 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
Entry.updateSizeAndAAInfo(Size, AAInfo);
// Add it to the end of the list...
+ ++SetSize;
assert(*PtrListEnd == nullptr && "End of list is not null?");
*PtrListEnd = &Entry;
PtrListEnd = Entry.setPrevInList(PtrListEnd);
assert(*PtrListEnd == nullptr && "End of list is not null?");
- addRef(); // Entry points to alias set.
+ // Entry points to alias set.
+ addRef();
+
+ if (Alias == SetMayAlias)
+ AST.TotalMayAliasSetSize++;
}
void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
@@ -145,6 +174,9 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
const AAMDNodes &AAInfo,
AliasAnalysis &AA) const {
+ if (AliasAny)
+ return true;
+
if (Alias == SetMustAlias) {
assert(UnknownInsts.empty() && "Illegal must alias set!");
@@ -177,6 +209,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
AliasAnalysis &AA) const {
+
+ if (AliasAny)
+ return true;
+
if (!Inst->mayReadOrWriteMemory())
return false;
@@ -229,17 +265,6 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
return FoundSet;
}
-/// containsPointer - Return true if the specified location is represented by
-/// this alias set, false otherwise. This does not modify the AST object or
-/// alias sets.
-bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size,
- const AAMDNodes &AAInfo) const {
- for (const AliasSet &AS : *this)
- if (!AS.Forward && AS.aliasesPointer(Ptr, Size, AAInfo, AA))
- return true;
- return false;
-}
-
bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
for (const AliasSet &AS : *this)
if (!AS.Forward && AS.aliasesUnknownInst(Inst, AA))
@@ -261,16 +286,28 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
return FoundSet;
}
-
-
-
/// getAliasSetForPointer - Return the alias set that the specified pointer
/// lives in.
AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
- const AAMDNodes &AAInfo,
- bool *New) {
+ const AAMDNodes &AAInfo) {
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+ if (AliasAnyAS) {
+ // At this point, the AST is saturated, so we only have one active alias
+ // set. That means we already know which alias set we want to return, and
+ // just need to add the pointer to that set to keep the data structure
+ // consistent.
+ // This, of course, means that we will never need a merge here.
+ if (Entry.hasAliasSet()) {
+ Entry.updateSizeAndAAInfo(Size, AAInfo);
+ assert(Entry.getAliasSet(*this) == AliasAnyAS &&
+ "Entry in saturated AST must belong to only alias set");
+ } else {
+ AliasAnyAS->addPointer(*this, Entry, Size, AAInfo);
+ }
+ return *AliasAnyAS;
+ }
+
// Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
// If the size changed, we may need to merge several alias sets.
@@ -290,68 +327,55 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
return *AS;
}
- if (New) *New = true;
// Otherwise create a new alias set to hold the loaded pointer.
AliasSets.push_back(new AliasSet());
AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
return AliasSets.back();
}
-bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
- bool NewPtr;
- addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess, NewPtr);
- return NewPtr;
+void AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+ addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess);
}
-
-bool AliasSetTracker::add(LoadInst *LI) {
+void AliasSetTracker::add(LoadInst *LI) {
if (isStrongerThanMonotonic(LI->getOrdering())) return addUnknown(LI);
AAMDNodes AAInfo;
LI->getAAMetadata(AAInfo);
AliasSet::AccessLattice Access = AliasSet::RefAccess;
- bool NewPtr;
const DataLayout &DL = LI->getModule()->getDataLayout();
AliasSet &AS = addPointer(LI->getOperand(0),
- DL.getTypeStoreSize(LI->getType()),
- AAInfo, Access, NewPtr);
+ DL.getTypeStoreSize(LI->getType()), AAInfo, Access);
if (LI->isVolatile()) AS.setVolatile();
- return NewPtr;
}
-bool AliasSetTracker::add(StoreInst *SI) {
+void AliasSetTracker::add(StoreInst *SI) {
if (isStrongerThanMonotonic(SI->getOrdering())) return addUnknown(SI);
AAMDNodes AAInfo;
SI->getAAMetadata(AAInfo);
AliasSet::AccessLattice Access = AliasSet::ModAccess;
- bool NewPtr;
const DataLayout &DL = SI->getModule()->getDataLayout();
Value *Val = SI->getOperand(0);
- AliasSet &AS = addPointer(SI->getOperand(1),
- DL.getTypeStoreSize(Val->getType()),
- AAInfo, Access, NewPtr);
+ AliasSet &AS = addPointer(
+ SI->getOperand(1), DL.getTypeStoreSize(Val->getType()), AAInfo, Access);
if (SI->isVolatile()) AS.setVolatile();
- return NewPtr;
}
-bool AliasSetTracker::add(VAArgInst *VAAI) {
+void AliasSetTracker::add(VAArgInst *VAAI) {
AAMDNodes AAInfo;
VAAI->getAAMetadata(AAInfo);
- bool NewPtr;
addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
- AliasSet::ModRefAccess, NewPtr);
- return NewPtr;
+ AliasSet::ModRefAccess);
}
-bool AliasSetTracker::add(MemSetInst *MSI) {
+void AliasSetTracker::add(MemSetInst *MSI) {
AAMDNodes AAInfo;
MSI->getAAMetadata(AAInfo);
- bool NewPtr;
uint64_t Len;
if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
@@ -360,30 +384,61 @@ bool AliasSetTracker::add(MemSetInst *MSI) {
Len = MemoryLocation::UnknownSize;
AliasSet &AS =
- addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess, NewPtr);
+ addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
if (MSI->isVolatile())
AS.setVolatile();
- return NewPtr;
}
-bool AliasSetTracker::addUnknown(Instruction *Inst) {
- if (isa<DbgInfoIntrinsic>(Inst))
- return true; // Ignore DbgInfo Intrinsics.
+void AliasSetTracker::add(MemTransferInst *MTI) {
+ AAMDNodes AAInfo;
+ MTI->getAAMetadata(AAInfo);
+
+ uint64_t Len;
+ if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+ Len = C->getZExtValue();
+ else
+ Len = MemoryLocation::UnknownSize;
+
+ AliasSet &ASSrc =
+ addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess);
+ if (MTI->isVolatile())
+ ASSrc.setVolatile();
+
+ AliasSet &ASDst =
+ addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
+ if (MTI->isVolatile())
+ ASDst.setVolatile();
+}
+
+void AliasSetTracker::addUnknown(Instruction *Inst) {
+ if (isa<DbgInfoIntrinsic>(Inst))
+ return; // Ignore DbgInfo Intrinsics.
+
+ if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // These intrinsics will show up as affecting memory, but they are just
+ // markers.
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ // FIXME: Add lifetime/invariant intrinsics (See: PR30807).
+ case Intrinsic::assume:
+ return;
+ }
+ }
if (!Inst->mayReadOrWriteMemory())
- return true; // doesn't alias anything
+ return; // doesn't alias anything
AliasSet *AS = findAliasSetForUnknownInst(Inst);
if (AS) {
AS->addUnknownInst(Inst, AA);
- return false;
+ return;
}
AliasSets.push_back(new AliasSet());
AS = &AliasSets.back();
AS->addUnknownInst(Inst, AA);
- return true;
}
-bool AliasSetTracker::add(Instruction *I) {
+void AliasSetTracker::add(Instruction *I) {
// Dispatch to one of the other add methods.
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return add(LI);
@@ -393,8 +448,9 @@ bool AliasSetTracker::add(Instruction *I) {
return add(VAAI);
if (MemSetInst *MSI = dyn_cast<MemSetInst>(I))
return add(MSI);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I))
+ return add(MTI);
return addUnknown(I);
- // FIXME: add support of memcpy and memmove.
}
void AliasSetTracker::add(BasicBlock &BB) {
@@ -418,134 +474,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
add(AS.UnknownInsts[i]);
// Loop over all of the pointers in this alias set.
- bool X;
for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
- AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
- ASI.getAAInfo(),
- (AliasSet::AccessLattice)AS.Access, X);
+ AliasSet &NewAS =
+ addPointer(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo(),
+ (AliasSet::AccessLattice)AS.Access);
if (AS.isVolatile()) NewAS.setVolatile();
}
}
}
-/// remove - Remove the specified (potentially non-empty) alias set from the
-/// tracker.
-void AliasSetTracker::remove(AliasSet &AS) {
- // Drop all call sites.
- if (!AS.UnknownInsts.empty())
- AS.dropRef(*this);
- AS.UnknownInsts.clear();
-
- // Clear the alias set.
- unsigned NumRefs = 0;
- while (!AS.empty()) {
- AliasSet::PointerRec *P = AS.PtrList;
-
- Value *ValToRemove = P->getValue();
-
- // Unlink and delete entry from the list of values.
- P->eraseFromList();
-
- // Remember how many references need to be dropped.
- ++NumRefs;
-
- // Finally, remove the entry.
- PointerMap.erase(ValToRemove);
- }
-
- // Stop using the alias set, removing it.
- AS.RefCount -= NumRefs;
- if (AS.RefCount == 0)
- AS.removeFromTracker(*this);
-}
-
-bool
-AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
- AliasSet *AS = mergeAliasSetsForPointer(Ptr, Size, AAInfo);
- if (!AS) return false;
- remove(*AS);
- return true;
-}
-
-bool AliasSetTracker::remove(LoadInst *LI) {
- const DataLayout &DL = LI->getModule()->getDataLayout();
- uint64_t Size = DL.getTypeStoreSize(LI->getType());
-
- AAMDNodes AAInfo;
- LI->getAAMetadata(AAInfo);
-
- AliasSet *AS = mergeAliasSetsForPointer(LI->getOperand(0), Size, AAInfo);
- if (!AS) return false;
- remove(*AS);
- return true;
-}
-
-bool AliasSetTracker::remove(StoreInst *SI) {
- const DataLayout &DL = SI->getModule()->getDataLayout();
- uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());
-
- AAMDNodes AAInfo;
- SI->getAAMetadata(AAInfo);
-
- AliasSet *AS = mergeAliasSetsForPointer(SI->getOperand(1), Size, AAInfo);
- if (!AS) return false;
- remove(*AS);
- return true;
-}
-
-bool AliasSetTracker::remove(VAArgInst *VAAI) {
- AAMDNodes AAInfo;
- VAAI->getAAMetadata(AAInfo);
-
- AliasSet *AS = mergeAliasSetsForPointer(VAAI->getOperand(0),
- MemoryLocation::UnknownSize, AAInfo);
- if (!AS) return false;
- remove(*AS);
- return true;
-}
-
-bool AliasSetTracker::remove(MemSetInst *MSI) {
- AAMDNodes AAInfo;
- MSI->getAAMetadata(AAInfo);
- uint64_t Len;
-
- if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
- Len = C->getZExtValue();
- else
- Len = MemoryLocation::UnknownSize;
-
- AliasSet *AS = mergeAliasSetsForPointer(MSI->getRawDest(), Len, AAInfo);
- if (!AS)
- return false;
- remove(*AS);
- return true;
-}
-
-bool AliasSetTracker::removeUnknown(Instruction *I) {
- if (!I->mayReadOrWriteMemory())
- return false; // doesn't alias anything
-
- AliasSet *AS = findAliasSetForUnknownInst(I);
- if (!AS) return false;
- remove(*AS);
- return true;
-}
-
-bool AliasSetTracker::remove(Instruction *I) {
- // Dispatch to one of the other remove methods...
- if (LoadInst *LI = dyn_cast<LoadInst>(I))
- return remove(LI);
- if (StoreInst *SI = dyn_cast<StoreInst>(I))
- return remove(SI);
- if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
- return remove(VAAI);
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(I))
- return remove(MSI);
- return removeUnknown(I);
- // FIXME: add support of memcpy and memmove.
-}
-
-
// deleteValue method - This method is used to remove a pointer value from the
// AliasSetTracker entirely. It should be used when an instruction is deleted
// from the program to update the AST. If you don't use this, you would have
@@ -575,6 +512,11 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
// Unlink and delete from the list of values.
PtrValEnt->eraseFromList();
+
+ if (AS->Alias == AliasSet::SetMayAlias) {
+ AS->SetSize--;
+ TotalMayAliasSetSize--;
+ }
// Stop using the alias set.
AS->dropRef(*this);
@@ -597,15 +539,68 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
AliasSet::PointerRec &Entry = getEntryFor(To);
if (Entry.hasAliasSet()) return; // Already in the tracker!
- // Add it to the alias set it aliases...
+ // getEntryFor above may invalidate iterator \c I, so reinitialize it.
I = PointerMap.find_as(From);
+ // Add it to the alias set it aliases...
AliasSet *AS = I->second->getAliasSet(*this);
AS->addPointer(*this, Entry, I->second->getSize(),
I->second->getAAInfo(),
true);
}
+AliasSet &AliasSetTracker::mergeAllAliasSets() {
+ assert(!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold) &&
+ "Full merge should happen once, when the saturation threshold is "
+ "reached");
+
+ // Collect all alias sets, so that we can drop references with impunity
+ // without worrying about iterator invalidation.
+ std::vector<AliasSet *> ASVector;
+ ASVector.reserve(SaturationThreshold);
+ for (iterator I = begin(), E = end(); I != E; I++)
+ ASVector.push_back(&*I);
+
+ // Copy all instructions and pointers into a new set, and forward all other
+ // sets to it.
+ AliasSets.push_back(new AliasSet());
+ AliasAnyAS = &AliasSets.back();
+ AliasAnyAS->Alias = AliasSet::SetMayAlias;
+ AliasAnyAS->Access = AliasSet::ModRefAccess;
+ AliasAnyAS->AliasAny = true;
+
+ for (auto Cur : ASVector) {
+
+ // If Cur was already forwarding, just forward to the new AS instead.
+ AliasSet *FwdTo = Cur->Forward;
+ if (FwdTo) {
+ Cur->Forward = AliasAnyAS;
+ AliasAnyAS->addRef();
+ FwdTo->dropRef(*this);
+ continue;
+ }
+
+ // Otherwise, perform the actual merge.
+ AliasAnyAS->mergeSetIn(*Cur, *this);
+ }
+
+ return *AliasAnyAS;
+}
+
+AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size,
+ const AAMDNodes &AAInfo,
+ AliasSet::AccessLattice E) {
+
+ AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
+ AS.Access |= E;
+
+ if (!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold)) {
+ // The AST is now saturated. From here on, we conservatively consider all
+ // pointers to alias each-other.
+ return mergeAllAliasSets();
+ }
+ return AS;
+}
//===----------------------------------------------------------------------===//
// AliasSet/AliasSetTracker Printing Support
@@ -700,7 +695,7 @@ namespace {
bool runOnFunction(Function &F) override {
auto &AAWP = getAnalysis<AAResultsWrapperPass>();
Tracker = new AliasSetTracker(AAWP.getAAResults());
-
+ errs() << "Alias sets for function '" << F.getName() << "':\n";
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
Tracker->add(&*I);
Tracker->print(errs());
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index c04447ca58c9..0e7cf402cdb5 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -30,10 +30,10 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCallGraphPrinterLegacyPassPass(Registry);
initializeCallGraphViewerPass(Registry);
initializeCostModelAnalysisPass(Registry);
- initializeCFGViewerPass(Registry);
- initializeCFGPrinterPass(Registry);
- initializeCFGOnlyViewerPass(Registry);
- initializeCFGOnlyPrinterPass(Registry);
+ initializeCFGViewerLegacyPassPass(Registry);
+ initializeCFGPrinterLegacyPassPass(Registry);
+ initializeCFGOnlyViewerLegacyPassPass(Registry);
+ initializeCFGOnlyPrinterLegacyPassPass(Registry);
initializeCFLAndersAAWrapperPassPass(Registry);
initializeCFLSteensAAWrapperPassPass(Registry);
initializeDependenceAnalysisWrapperPassPass(Registry);
@@ -54,6 +54,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeIVUsersWrapperPassPass(Registry);
initializeInstCountPass(Registry);
initializeIntervalPartitionPass(Registry);
+ initializeLazyBranchProbabilityInfoPassPass(Registry);
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
initializeLintPass(Registry);
@@ -76,6 +77,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeTargetTransformInfoWrapperPassPass(Registry);
initializeTypeBasedAAWrapperPassPass(Registry);
initializeScopedNoAliasAAWrapperPassPass(Registry);
+ initializeLCSSAVerificationPassPass(Registry);
}
void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index ca71644757f0..3c518034ba62 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -74,10 +74,10 @@ void AssumptionCache::registerAssumption(CallInst *CI) {
#endif
}
-char AssumptionAnalysis::PassID;
+AnalysisKey AssumptionAnalysis::Key;
PreservedAnalyses AssumptionPrinterPass::run(Function &F,
- AnalysisManager<Function> &AM) {
+ FunctionAnalysisManager &AM) {
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
OS << "Cached assumptions for function: " << F.getName() << "\n";
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 43d5c3ccf907..e8d86bdbca5b 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -63,6 +63,21 @@ const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
// depth otherwise the algorithm in aliasGEP will assert.
static const unsigned MaxLookupSearchDepth = 6;
+bool BasicAAResult::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // We don't care if this analysis itself is preserved, it has no state. But
+ // we need to check that the analyses it depends on have been. Note that we
+ // may be created without handles to some analyses and in that case don't
+ // depend on them.
+ if (Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+ (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)) ||
+ (LI && Inv.invalidate<LoopAnalysis>(F, PA)))
+ return true;
+
+ // Otherwise this analysis result remains valid.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Useful predicates
//===----------------------------------------------------------------------===//
@@ -227,7 +242,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
Offset = 0;
return V;
}
- // FALL THROUGH.
+ LLVM_FALLTHROUGH;
case Instruction::Add:
V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
@@ -275,7 +290,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL,
Depth + 1, AC, DT, NSW, NUW);
- // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this
+ // zext(zext(%x)) == zext(%x), and similarly for sext; we'll handle this
// by just incrementing the number of bits we've extended by.
unsigned ExtendedBy = NewWidth - SmallWidth;
@@ -409,11 +424,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
unsigned PointerSize = DL.getPointerSizeInBits(AS);
+ // Assume all GEP operands are constants until proven otherwise.
+ bool GepHasConstantOffset = true;
for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
- I != E; ++I) {
+ I != E; ++I, ++GTI) {
const Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
- if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0)
@@ -429,11 +446,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
if (CIdx->isZero())
continue;
Decomposed.OtherOffset +=
- DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
+ DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue();
continue;
}
- uint64_t Scale = DL.getTypeAllocSize(*GTI);
+ GepHasConstantOffset = false;
+
+ uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType());
unsigned ZExtBits = 0, SExtBits = 0;
// If the integer type is smaller than the pointer size, it is implicitly
@@ -458,7 +477,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
- if (Decomposed.VarIndices[i].V == Index &&
+ if (Decomposed.VarIndices[i].V == Index &&
Decomposed.VarIndices[i].ZExtBits == ZExtBits &&
Decomposed.VarIndices[i].SExtBits == SExtBits) {
Scale += Decomposed.VarIndices[i].Scale;
@@ -479,10 +498,12 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
}
// Take care of wrap-arounds
- Decomposed.StructOffset =
- adjustToPointerSize(Decomposed.StructOffset, PointerSize);
- Decomposed.OtherOffset =
- adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
+ if (GepHasConstantOffset) {
+ Decomposed.StructOffset =
+ adjustToPointerSize(Decomposed.StructOffset, PointerSize);
+ Decomposed.OtherOffset =
+ adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
+ }
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
@@ -603,6 +624,10 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
if (F->onlyAccessesArgMemory())
Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
+ else if (F->onlyAccessesInaccessibleMemory())
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem);
+ else if (F->onlyAccessesInaccessibleMemOrArgMem())
+ Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem);
return Min;
}
@@ -732,7 +757,8 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!CS.doesNotCapture(OperandNo) && !CS.isByValArgument(OperandNo)))
+ (!CS.doesNotCapture(OperandNo) &&
+ OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
continue;
// If this is a no-capture pointer argument, see if we can tell that it
@@ -765,6 +791,31 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
return MRI_NoModRef;
}
+ // The semantics of memcpy intrinsics forbid overlap between their respective
+ // operands, i.e., source and destination of any given memcpy must no-alias.
+ // If Loc must-aliases either one of these two locations, then it necessarily
+ // no-aliases the other.
+ if (auto *Inst = dyn_cast<MemCpyInst>(CS.getInstruction())) {
+ AliasResult SrcAA, DestAA;
+
+ if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
+ Loc)) == MustAlias)
+ // Loc is exactly the memcpy source thus disjoint from memcpy dest.
+ return MRI_Ref;
+ if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
+ Loc)) == MustAlias)
+ // The converse case.
+ return MRI_Mod;
+
+ // It's also possible for Loc to alias both src and dest, or neither.
+ ModRefInfo rv = MRI_NoModRef;
+ if (SrcAA != NoAlias)
+ rv = static_cast<ModRefInfo>(rv | MRI_Ref);
+ if (DestAA != NoAlias)
+ rv = static_cast<ModRefInfo>(rv | MRI_Mod);
+ return rv;
+ }
+
// While the assume intrinsic is marked as arbitrarily writing so that
// proper control dependencies will be maintained, it never aliases any
// particular memory location.
@@ -781,6 +832,32 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
return MRI_Ref;
+ // Like assumes, invariant.start intrinsics were also marked as arbitrarily
+ // writing so that proper control dependencies are maintained but they never
+ // mod any particular memory location visible to the IR.
+ // *Unlike* assumes (which are now modeled as NoModRef), invariant.start
+ // intrinsic is now modeled as reading memory. This prevents hoisting the
+ // invariant.start intrinsic over stores. Consider:
+ // *ptr = 40;
+ // *ptr = 50;
+ // invariant_start(ptr)
+ // int val = *ptr;
+ // print(val);
+ //
+ // This cannot be transformed to:
+ //
+ // *ptr = 40;
+ // invariant_start(ptr)
+ // *ptr = 50;
+ // int val = *ptr;
+ // print(val);
+ //
+ // The transformation will cause the second store to be ignored (based on
+ // rules of invariant.start) and print 40, while the first program always
+ // prints 50.
+ if (isIntrinsicCall(CS, Intrinsic::invariant_start))
+ return MRI_Ref;
+
// The AAResultBase base class has some smarts, lets use them.
return AAResultBase::getModRefInfo(CS, Loc);
}
@@ -1114,7 +1191,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
return MayAlias;
AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
- AAMDNodes(), V2, V2Size, V2AAInfo);
+ AAMDNodes(), V2, V2Size, V2AAInfo,
+ nullptr, UnderlyingV2);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -1251,7 +1329,8 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
const AAMDNodes &SIAAInfo,
const Value *V2, uint64_t V2Size,
- const AAMDNodes &V2AAInfo) {
+ const AAMDNodes &V2AAInfo,
+ const Value *UnderV2) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
@@ -1269,12 +1348,14 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
+ SISize, SIAAInfo, UnderV2);
if (Alias == MayAlias)
return MayAlias;
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo,
+ UnderV2);
return MergeAliasResults(ThisAlias, Alias);
}
@@ -1282,8 +1363,8 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
/// another.
AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
- uint64_t V2Size,
- const AAMDNodes &V2AAInfo) {
+ uint64_t V2Size, const AAMDNodes &V2AAInfo,
+ const Value *UnderV2) {
// Track phi nodes we have visited. We use this information when we determine
// value equivalence.
VisitedPhiBBs.insert(PN->getParent());
@@ -1362,7 +1443,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
PNSize = MemoryLocation::UnknownSize;
AliasResult Alias =
- aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
+ PNSize, PNAAInfo, UnderV2);
// Early exit if the check of the first PHI source against V2 is MayAlias.
// Other results are not possible.
@@ -1375,7 +1457,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
Value *V = V1Srcs[i];
AliasResult ThisAlias =
- aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);
+ aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2);
Alias = MergeAliasResults(ThisAlias, Alias);
if (Alias == MayAlias)
break;
@@ -1388,7 +1470,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
/// array references.
AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
AAMDNodes V1AAInfo, const Value *V2,
- uint64_t V2Size, AAMDNodes V2AAInfo) {
+ uint64_t V2Size, AAMDNodes V2AAInfo,
+ const Value *O1, const Value *O2) {
// If either of the memory references is empty, it doesn't matter what the
// pointer values are.
if (V1Size == 0 || V2Size == 0)
@@ -1416,8 +1499,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
- const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
- const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+ if (O1 == nullptr)
+ O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+
+ if (O2 == nullptr)
+ O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
@@ -1500,23 +1586,26 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
std::swap(V1, V2);
+ std::swap(O1, O2);
std::swap(V1Size, V2Size);
std::swap(V1AAInfo, V2AAInfo);
}
if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
- AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+ AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
+ V2, V2Size, V2AAInfo, O2);
if (Result != MayAlias)
return AliasCache[Locs] = Result;
}
if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
std::swap(V1, V2);
+ std::swap(O1, O2);
std::swap(V1Size, V2Size);
std::swap(V1AAInfo, V2AAInfo);
}
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
AliasResult Result =
- aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+ aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2);
if (Result != MayAlias)
return AliasCache[Locs] = Result;
}
@@ -1667,9 +1756,9 @@ bool BasicAAResult::constantOffsetHeuristic(
// BasicAliasAnalysis Pass
//===----------------------------------------------------------------------===//
-char BasicAA::PassID;
+AnalysisKey BasicAA::Key;
-BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> &AM) {
+BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
return BasicAAResult(F.getParent()->getDataLayout(),
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F),
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 1dd8f4fdfcfe..4cdbe4d0fcf6 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -39,8 +39,7 @@ static cl::opt<GVDAGType> ViewBlockFreqPropagationDAG(
"display a graph using the raw "
"integer fractional block frequency representation."),
clEnumValN(GVDT_Count, "count", "display a graph using the real "
- "profile count if available."),
- clEnumValEnd));
+ "profile count if available.")));
cl::opt<std::string>
ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden,
@@ -60,24 +59,22 @@ namespace llvm {
template <>
struct GraphTraits<BlockFrequencyInfo *> {
- typedef const BasicBlock NodeType;
+ typedef const BasicBlock *NodeRef;
typedef succ_const_iterator ChildIteratorType;
- typedef Function::const_iterator nodes_iterator;
+ typedef pointer_iterator<Function::const_iterator> nodes_iterator;
- static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+ static NodeRef getEntryNode(const BlockFrequencyInfo *G) {
return &G->getFunction()->front();
}
- static ChildIteratorType child_begin(const NodeType *N) {
+ static ChildIteratorType child_begin(const NodeRef N) {
return succ_begin(N);
}
- static ChildIteratorType child_end(const NodeType *N) {
- return succ_end(N);
- }
+ static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
- return G->getFunction()->begin();
+ return nodes_iterator(G->getFunction()->begin());
}
static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
- return G->getFunction()->end();
+ return nodes_iterator(G->getFunction()->end());
}
};
@@ -162,6 +159,13 @@ BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const {
return BFI->getBlockProfileCount(*getFunction(), BB);
}
+Optional<uint64_t>
+BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+ if (!BFI)
+ return None;
+ return BFI->getProfileCountFromFreq(*getFunction(), Freq);
+}
+
void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
assert(BFI && "Expected analysis to be available");
BFI->setBlockFreq(BB, Freq);
@@ -248,9 +252,9 @@ bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
return false;
}
-char BlockFrequencyAnalysis::PassID;
+AnalysisKey BlockFrequencyAnalysis::Key;
BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
- AnalysisManager<Function> &AM) {
+ FunctionAnalysisManager &AM) {
BlockFrequencyInfo BFI;
BFI.calculate(F, AM.getResult<BranchProbabilityAnalysis>(F),
AM.getResult<LoopAnalysis>(F));
@@ -258,7 +262,7 @@ BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
}
PreservedAnalyses
-BlockFrequencyPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+BlockFrequencyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
OS << "Printing analysis results of BFI for function "
<< "'" << F.getName() << "':"
<< "\n";
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index c2039e1dec2b..9850e02fca22 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -533,12 +533,18 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
Optional<uint64_t>
BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
const BlockNode &Node) const {
+ return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
+}
+
+Optional<uint64_t>
+BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
+ uint64_t Freq) const {
auto EntryCount = F.getEntryCount();
if (!EntryCount)
return None;
// Use 128 bit APInt to do the arithmetic to avoid overflow.
APInt BlockCount(128, EntryCount.getValue());
- APInt BlockFreq(128, getBlockFreq(Node).getFrequency());
+ APInt BlockFreq(128, Freq);
APInt EntryFreq(128, getEntryFreq());
BlockCount *= BlockFreq;
BlockCount = BlockCount.udiv(EntryFreq);
@@ -622,15 +628,12 @@ namespace llvm {
template <> struct GraphTraits<IrreducibleGraph> {
typedef bfi_detail::IrreducibleGraph GraphT;
- typedef const GraphT::IrrNode NodeType;
typedef const GraphT::IrrNode *NodeRef;
typedef GraphT::IrrNode::iterator ChildIteratorType;
- static const NodeType *getEntryNode(const GraphT &G) {
- return G.StartIrr;
- }
- static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
- static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
+ static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
};
} // end namespace llvm
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index d802552d4e29..3eabb780398c 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -162,12 +162,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
return true;
}
- BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
- (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
- UnreachableEdges.size());
- BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
- (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
- ReachableEdges.size());
+ auto UnreachableProb = BranchProbability::getBranchProbability(
+ UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+ uint64_t(UnreachableEdges.size()));
+ auto ReachableProb = BranchProbability::getBranchProbability(
+ UR_NONTAKEN_WEIGHT,
+ (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size()));
for (unsigned SuccIdx : UnreachableEdges)
setEdgeProbability(BB, SuccIdx, UnreachableProb);
@@ -279,6 +279,16 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
}
}
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ // If the terminator is an InvokeInst, consider only the normal destination
+ // block.
+ if (PostDominatedByColdCall.count(II->getNormalDest()))
+ PostDominatedByColdCall.insert(BB);
+ // Return false here so that edge weights for InvokeInst could be decided
+ // in calcInvokeHeuristics().
+ return false;
+ }
+
// Skip probabilities if this block has a single successor.
if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
return false;
@@ -290,12 +300,12 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
return true;
}
- BranchProbability ColdProb(CC_TAKEN_WEIGHT,
- (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
- ColdEdges.size());
- BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
- (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
- NormalEdges.size());
+ auto ColdProb = BranchProbability::getBranchProbability(
+ CC_TAKEN_WEIGHT,
+ (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size()));
+ auto NormalProb = BranchProbability::getBranchProbability(
+ CC_NONTAKEN_WEIGHT,
+ (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size()));
for (unsigned SuccIdx : ColdEdges)
setEdgeProbability(BB, SuccIdx, ColdProb);
@@ -701,16 +711,16 @@ void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
BPI.print(OS);
}
-char BranchProbabilityAnalysis::PassID;
+AnalysisKey BranchProbabilityAnalysis::Key;
BranchProbabilityInfo
-BranchProbabilityAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
BranchProbabilityInfo BPI;
BPI.calculate(F, AM.getResult<LoopAnalysis>(F));
return BPI;
}
PreservedAnalyses
-BranchProbabilityPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
OS << "Printing analysis results of BPI for function "
<< "'" << F.getName() << "':"
<< "\n";
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index c86f1f55954b..a85af6c9c93f 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -23,10 +23,10 @@
using namespace llvm;
namespace {
- struct CFGViewer : public FunctionPass {
+ struct CFGViewerLegacyPass : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGViewer() : FunctionPass(ID) {
- initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ CFGViewerLegacyPass() : FunctionPass(ID) {
+ initializeCFGViewerLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
@@ -42,14 +42,21 @@ namespace {
};
}
-char CFGViewer::ID = 0;
-INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+char CFGViewerLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGViewerLegacyPass, "view-cfg", "View CFG of function", false, true)
+
+PreservedAnalyses CFGViewerPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ F.viewCFG();
+ return PreservedAnalyses::all();
+}
+
namespace {
- struct CFGOnlyViewer : public FunctionPass {
+ struct CFGOnlyViewerLegacyPass : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGOnlyViewer() : FunctionPass(ID) {
- initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+ CFGOnlyViewerLegacyPass() : FunctionPass(ID) {
+ initializeCFGOnlyViewerLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
@@ -65,29 +72,39 @@ namespace {
};
}
-char CFGOnlyViewer::ID = 0;
-INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+char CFGOnlyViewerLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewerLegacyPass, "view-cfg-only",
"View CFG of function (with no function bodies)", false, true)
+PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ F.viewCFGOnly();
+ return PreservedAnalyses::all();
+}
+
+static void writeCFGToDotFile(Function &F) {
+ std::string Filename = ("cfg." + F.getName() + ".dot").str();
+ errs() << "Writing '" << Filename << "'...";
+
+ std::error_code EC;
+ raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+ if (!EC)
+ WriteGraph(File, (const Function*)&F);
+ else
+ errs() << " error opening file for writing!";
+ errs() << "\n";
+}
+
namespace {
- struct CFGPrinter : public FunctionPass {
+ struct CFGPrinterLegacyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGPrinter() : FunctionPass(ID) {
- initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+ CFGPrinterLegacyPass() : FunctionPass(ID) {
+ initializeCFGPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
- std::string Filename = ("cfg." + F.getName() + ".dot").str();
- errs() << "Writing '" << Filename << "'...";
-
- std::error_code EC;
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
-
- if (!EC)
- WriteGraph(File, (const Function*)&F);
- else
- errs() << " error opening file for writing!";
- errs() << "\n";
+ writeCFGToDotFile(F);
return false;
}
@@ -99,29 +116,25 @@ namespace {
};
}
-char CFGPrinter::ID = 0;
-INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file",
+char CFGPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file",
false, true)
+PreservedAnalyses CFGPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ writeCFGToDotFile(F);
+ return PreservedAnalyses::all();
+}
+
namespace {
- struct CFGOnlyPrinter : public FunctionPass {
+ struct CFGOnlyPrinterLegacyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGOnlyPrinter() : FunctionPass(ID) {
- initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+ CFGOnlyPrinterLegacyPass() : FunctionPass(ID) {
+ initializeCFGOnlyPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
- std::string Filename = ("cfg." + F.getName() + ".dot").str();
- errs() << "Writing '" << Filename << "'...";
-
- std::error_code EC;
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
-
- if (!EC)
- WriteGraph(File, (const Function*)&F, true);
- else
- errs() << " error opening file for writing!";
- errs() << "\n";
+ writeCFGToDotFile(F);
return false;
}
void print(raw_ostream &OS, const Module* = nullptr) const override {}
@@ -132,11 +145,17 @@ namespace {
};
}
-char CFGOnlyPrinter::ID = 0;
-INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+char CFGOnlyPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only",
"Print CFG of function to 'dot' file (with no function bodies)",
false, true)
+PreservedAnalyses CFGOnlyPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ writeCFGToDotFile(F);
+ return PreservedAnalyses::all();
+}
+
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
/// program, displaying the CFG of the current function. This depends on there
@@ -155,11 +174,11 @@ void Function::viewCFGOnly() const {
ViewGraph(this, "cfg" + getName(), true);
}
-FunctionPass *llvm::createCFGPrinterPass () {
- return new CFGPrinter();
+FunctionPass *llvm::createCFGPrinterLegacyPassPass () {
+ return new CFGPrinterLegacyPass();
}
-FunctionPass *llvm::createCFGOnlyPrinterPass () {
- return new CFGOnlyPrinter();
+FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass () {
+ return new CFGOnlyPrinterLegacyPass();
}
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 7d5bd94133a7..e48ff230f43c 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -27,12 +27,23 @@
// codes: all we do here is to selectively expand the transitive closure by
// discarding edges that are not recognized by the state machine.
//
-// There is one difference between our current implementation and the one
-// described in the paper: out algorithm eagerly computes all alias pairs after
-// the CFLGraph is built, while in the paper the authors did the computation in
-// a demand-driven fashion. We did not implement the demand-driven algorithm due
-// to the additional coding complexity and higher memory profile, but if we
-// found it necessary we may switch to it eventually.
+// There are two differences between our current implementation and the one
+// described in the paper:
+// - Our algorithm eagerly computes all alias pairs after the CFLGraph is built,
+// while in the paper the authors did the computation in a demand-driven
+// fashion. We did not implement the demand-driven algorithm due to the
+// additional coding complexity and higher memory profile, but if we found it
+// necessary we may switch to it eventually.
+// - In the paper the authors use a state machine that does not distinguish
+// value reads from value writes. For example, if Y is reachable from X at state
+// S3, it may be the case that X is written into Y, or it may be the case that
+// there's a third value Z that writes into both X and Y. To make that
+// distinction (which is crucial in building function summary as well as
+// retrieving mod-ref info), we choose to duplicate some of the states in the
+// paper's proposed state machine. The duplication does not change the set the
+// machine accepts. Given a pair of reachable values, it only provides more
+// detailed information on which value is being written into and which is being
+// read from.
//
//===----------------------------------------------------------------------===//
@@ -71,16 +82,65 @@ static const Function *parentFunctionOfValue(const Value *Val) {
namespace {
enum class MatchState : uint8_t {
- FlowFrom = 0, // S1 in the paper
- FlowFromMemAlias, // S2 in the paper
- FlowTo, // S3 in the paper
- FlowToMemAlias // S4 in the paper
+ // The following state represents S1 in the paper.
+ FlowFromReadOnly = 0,
+ // The following two states together represent S2 in the paper.
+ // The 'NoReadWrite' suffix indicates that there exists an alias path that
+ // does not contain assignment and reverse assignment edges.
+ // The 'ReadOnly' suffix indicates that there exists an alias path that
+ // contains reverse assignment edges only.
+ FlowFromMemAliasNoReadWrite,
+ FlowFromMemAliasReadOnly,
+ // The following two states together represent S3 in the paper.
+ // The 'WriteOnly' suffix indicates that there exists an alias path that
+ // contains assignment edges only.
+ // The 'ReadWrite' suffix indicates that there exists an alias path that
+ // contains both assignment and reverse assignment edges. Note that if X and Y
+ // are reachable at 'ReadWrite' state, it does NOT mean X is both read from
+ // and written to Y. Instead, it means that a third value Z is written to both
+ // X and Y.
+ FlowToWriteOnly,
+ FlowToReadWrite,
+ // The following two states together represent S4 in the paper.
+ FlowToMemAliasWriteOnly,
+ FlowToMemAliasReadWrite,
};
+typedef std::bitset<7> StateSet;
+const unsigned ReadOnlyStateMask =
+ (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) |
+ (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly));
+const unsigned WriteOnlyStateMask =
+ (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) |
+ (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly));
+
+// A pair that consists of a value and an offset
+struct OffsetValue {
+ const Value *Val;
+ int64_t Offset;
+};
+
+bool operator==(OffsetValue LHS, OffsetValue RHS) {
+ return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset;
+}
+bool operator<(OffsetValue LHS, OffsetValue RHS) {
+ return std::less<const Value *>()(LHS.Val, RHS.Val) ||
+ (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset);
+}
+
+// A pair that consists of an InstantiatedValue and an offset
+struct OffsetInstantiatedValue {
+ InstantiatedValue IVal;
+ int64_t Offset;
+};
+
+bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
+ return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset;
+}
+
// We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
// the paper) during the analysis.
class ReachabilitySet {
- typedef std::bitset<4> StateSet;
typedef DenseMap<InstantiatedValue, StateSet> ValueStateMap;
typedef DenseMap<InstantiatedValue, ValueStateMap> ValueReachMap;
ValueReachMap ReachMap;
@@ -91,6 +151,7 @@ public:
// Insert edge 'From->To' at state 'State'
bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) {
+ assert(From != To);
auto &States = ReachMap[To][From];
auto Idx = static_cast<size_t>(State);
if (!States.test(Idx)) {
@@ -150,8 +211,6 @@ public:
typedef MapType::const_iterator const_iterator;
bool add(InstantiatedValue V, AliasAttrs Attr) {
- if (Attr.none())
- return false;
auto &OldAttr = AttrMap[V];
auto NewAttr = OldAttr | Attr;
if (OldAttr == NewAttr)
@@ -178,6 +237,57 @@ struct WorkListItem {
InstantiatedValue To;
MatchState State;
};
+
+struct ValueSummary {
+ struct Record {
+ InterfaceValue IValue;
+ unsigned DerefLevel;
+ };
+ SmallVector<Record, 4> FromRecords, ToRecords;
+};
+}
+
+namespace llvm {
+// Specialize DenseMapInfo for OffsetValue.
+template <> struct DenseMapInfo<OffsetValue> {
+ static OffsetValue getEmptyKey() {
+ return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
+ DenseMapInfo<int64_t>::getEmptyKey()};
+ }
+ static OffsetValue getTombstoneKey() {
+ return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
+ DenseMapInfo<int64_t>::getEmptyKey()};
+ }
+ static unsigned getHashValue(const OffsetValue &OVal) {
+ return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
+ std::make_pair(OVal.Val, OVal.Offset));
+ }
+ static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// Specialize DenseMapInfo for OffsetInstantiatedValue.
+template <> struct DenseMapInfo<OffsetInstantiatedValue> {
+ static OffsetInstantiatedValue getEmptyKey() {
+ return OffsetInstantiatedValue{
+ DenseMapInfo<InstantiatedValue>::getEmptyKey(),
+ DenseMapInfo<int64_t>::getEmptyKey()};
+ }
+ static OffsetInstantiatedValue getTombstoneKey() {
+ return OffsetInstantiatedValue{
+ DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
+ DenseMapInfo<int64_t>::getEmptyKey()};
+ }
+ static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
+ return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
+ std::make_pair(OVal.IVal, OVal.Offset));
+ }
+ static bool isEqual(const OffsetInstantiatedValue &LHS,
+ const OffsetInstantiatedValue &RHS) {
+ return LHS == RHS;
+ }
+};
}
class CFLAndersAAResult::FunctionInfo {
@@ -185,7 +295,7 @@ class CFLAndersAAResult::FunctionInfo {
/// Since the alias relation is symmetric, to save some space we assume values
/// are properly ordered: if a and b alias each other, and a < b, then b is in
/// AliasMap[a] but not vice versa.
- DenseMap<const Value *, std::vector<const Value *>> AliasMap;
+ DenseMap<const Value *, std::vector<OffsetValue>> AliasMap;
/// Map a value to its corresponding AliasAttrs
DenseMap<const Value *, AliasAttrs> AttrMap;
@@ -193,27 +303,56 @@ class CFLAndersAAResult::FunctionInfo {
/// Summary of externally visible effects.
AliasSummary Summary;
- AliasAttrs getAttrs(const Value *) const;
+ Optional<AliasAttrs> getAttrs(const Value *) const;
public:
- FunctionInfo(const ReachabilitySet &, AliasAttrMap);
+ FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
+ const ReachabilitySet &, AliasAttrMap);
- bool mayAlias(const Value *LHS, const Value *RHS) const;
+ bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const;
const AliasSummary &getAliasSummary() const { return Summary; }
};
-CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet,
- AliasAttrMap AMap) {
- // Populate AttrMap
+static bool hasReadOnlyState(StateSet Set) {
+ return (Set & StateSet(ReadOnlyStateMask)).any();
+}
+
+static bool hasWriteOnlyState(StateSet Set) {
+ return (Set & StateSet(WriteOnlyStateMask)).any();
+}
+
+static Optional<InterfaceValue>
+getInterfaceValue(InstantiatedValue IValue,
+ const SmallVectorImpl<Value *> &RetVals) {
+ auto Val = IValue.Val;
+
+ Optional<unsigned> Index;
+ if (auto Arg = dyn_cast<Argument>(Val))
+ Index = Arg->getArgNo() + 1;
+ else if (is_contained(RetVals, Val))
+ Index = 0;
+
+ if (Index)
+ return InterfaceValue{*Index, IValue.DerefLevel};
+ return None;
+}
+
+static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap,
+ const AliasAttrMap &AMap) {
for (const auto &Mapping : AMap.mappings()) {
auto IVal = Mapping.first;
+ // Insert IVal into the map
+ auto &Attr = AttrMap[IVal.Val];
// AttrMap only cares about top-level values
if (IVal.DerefLevel == 0)
- AttrMap[IVal.Val] = Mapping.second;
+ Attr |= Mapping.second;
}
+}
- // Populate AliasMap
+static void
+populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
+ const ReachabilitySet &ReachSet) {
for (const auto &OuterMapping : ReachSet.value_mappings()) {
// AliasMap only cares about top-level values
if (OuterMapping.first.DerefLevel > 0)
@@ -224,48 +363,202 @@ CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet,
for (const auto &InnerMapping : OuterMapping.second) {
// Again, AliasMap only cares about top-level values
if (InnerMapping.first.DerefLevel == 0)
- AliasList.push_back(InnerMapping.first.Val);
+ AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset});
}
// Sort AliasList for faster lookup
- std::sort(AliasList.begin(), AliasList.end(), std::less<const Value *>());
+ std::sort(AliasList.begin(), AliasList.end());
}
+}
- // TODO: Populate function summary here
+static void populateExternalRelations(
+ SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn,
+ const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) {
+ // If a function only returns one of its argument X, then X will be both an
+ // argument and a return value at the same time. This is an edge case that
+ // needs special handling here.
+ for (const auto &Arg : Fn.args()) {
+ if (is_contained(RetVals, &Arg)) {
+ auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0};
+ auto RetVal = InterfaceValue{0, 0};
+ ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0});
+ }
+ }
+
+ // Below is the core summary construction logic.
+ // A naive solution of adding only the value aliases that are parameters or
+ // return values in ReachSet to the summary won't work: It is possible that a
+ // parameter P is written into an intermediate value I, and the function
+ // subsequently returns *I. In that case, *I is does not value alias anything
+ // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to
+ // (I, 1).
+ // To account for the aforementioned case, we need to check each non-parameter
+ // and non-return value for the possibility of acting as an intermediate.
+ // 'ValueMap' here records, for each value, which InterfaceValues read from or
+ // write into it. If both the read list and the write list of a given value
+ // are non-empty, we know that a particular value is an intermidate and we
+ // need to add summary edges from the writes to the reads.
+ DenseMap<Value *, ValueSummary> ValueMap;
+ for (const auto &OuterMapping : ReachSet.value_mappings()) {
+ if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) {
+ for (const auto &InnerMapping : OuterMapping.second) {
+ // If Src is a param/return value, we get a same-level assignment.
+ if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) {
+ // This may happen if both Dst and Src are return values
+ if (*Dst == *Src)
+ continue;
+
+ if (hasReadOnlyState(InnerMapping.second))
+ ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset});
+ // No need to check for WriteOnly state, since ReachSet is symmetric
+ } else {
+ // If Src is not a param/return, add it to ValueMap
+ auto SrcIVal = InnerMapping.first;
+ if (hasReadOnlyState(InnerMapping.second))
+ ValueMap[SrcIVal.Val].FromRecords.push_back(
+ ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
+ if (hasWriteOnlyState(InnerMapping.second))
+ ValueMap[SrcIVal.Val].ToRecords.push_back(
+ ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
+ }
+ }
+ }
+ }
+
+ for (const auto &Mapping : ValueMap) {
+ for (const auto &FromRecord : Mapping.second.FromRecords) {
+ for (const auto &ToRecord : Mapping.second.ToRecords) {
+ auto ToLevel = ToRecord.DerefLevel;
+ auto FromLevel = FromRecord.DerefLevel;
+ // Same-level assignments should have already been processed by now
+ if (ToLevel == FromLevel)
+ continue;
+
+ auto SrcIndex = FromRecord.IValue.Index;
+ auto SrcLevel = FromRecord.IValue.DerefLevel;
+ auto DstIndex = ToRecord.IValue.Index;
+ auto DstLevel = ToRecord.IValue.DerefLevel;
+ if (ToLevel > FromLevel)
+ SrcLevel += ToLevel - FromLevel;
+ else
+ DstLevel += FromLevel - ToLevel;
+
+ ExtRelations.push_back(ExternalRelation{
+ InterfaceValue{SrcIndex, SrcLevel},
+ InterfaceValue{DstIndex, DstLevel}, UnknownOffset});
+ }
+ }
+ }
+
+ // Remove duplicates in ExtRelations
+ std::sort(ExtRelations.begin(), ExtRelations.end());
+ ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
+ ExtRelations.end());
+}
+
+static void populateExternalAttributes(
+ SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn,
+ const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) {
+ for (const auto &Mapping : AMap.mappings()) {
+ if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) {
+ auto Attr = getExternallyVisibleAttrs(Mapping.second);
+ if (Attr.any())
+ ExtAttributes.push_back(ExternalAttribute{*IVal, Attr});
+ }
+ }
+}
+
+CFLAndersAAResult::FunctionInfo::FunctionInfo(
+ const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
+ const ReachabilitySet &ReachSet, AliasAttrMap AMap) {
+ populateAttrMap(AttrMap, AMap);
+ populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
+ populateAliasMap(AliasMap, ReachSet);
+ populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet);
}
-AliasAttrs CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
+Optional<AliasAttrs>
+CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
assert(V != nullptr);
- AliasAttrs Attr;
auto Itr = AttrMap.find(V);
if (Itr != AttrMap.end())
- Attr = Itr->second;
- return Attr;
+ return Itr->second;
+ return None;
}
bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
- const Value *RHS) const {
+ uint64_t LHSSize,
+ const Value *RHS,
+ uint64_t RHSSize) const {
assert(LHS && RHS);
+ // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
+ // after the analysis gets executed, and we want to be conservative in those
+ // cases.
+ auto MaybeAttrsA = getAttrs(LHS);
+ auto MaybeAttrsB = getAttrs(RHS);
+ if (!MaybeAttrsA || !MaybeAttrsB)
+ return true;
+
+ // Check AliasAttrs before AliasMap lookup since it's cheaper
+ auto AttrsA = *MaybeAttrsA;
+ auto AttrsB = *MaybeAttrsB;
+ if (hasUnknownOrCallerAttr(AttrsA))
+ return AttrsB.any();
+ if (hasUnknownOrCallerAttr(AttrsB))
+ return AttrsA.any();
+ if (isGlobalOrArgAttr(AttrsA))
+ return isGlobalOrArgAttr(AttrsB);
+ if (isGlobalOrArgAttr(AttrsB))
+ return isGlobalOrArgAttr(AttrsA);
+
+ // At this point both LHS and RHS should point to locally allocated objects
+
auto Itr = AliasMap.find(LHS);
if (Itr != AliasMap.end()) {
- if (std::binary_search(Itr->second.begin(), Itr->second.end(), RHS,
- std::less<const Value *>()))
- return true;
- }
- // Even if LHS and RHS are not reachable, they may still alias due to their
- // AliasAttrs
- auto AttrsA = getAttrs(LHS);
- auto AttrsB = getAttrs(RHS);
+ // Find out all (X, Offset) where X == RHS
+ auto Comparator = [](OffsetValue LHS, OffsetValue RHS) {
+ return std::less<const Value *>()(LHS.Val, RHS.Val);
+ };
+#ifdef EXPENSIVE_CHECKS
+ assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator));
+#endif
+ auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(),
+ OffsetValue{RHS, 0}, Comparator);
+
+ if (RangePair.first != RangePair.second) {
+ // Be conservative about UnknownSize
+ if (LHSSize == MemoryLocation::UnknownSize ||
+ RHSSize == MemoryLocation::UnknownSize)
+ return true;
+
+ for (const auto &OVal : make_range(RangePair)) {
+ // Be conservative about UnknownOffset
+ if (OVal.Offset == UnknownOffset)
+ return true;
+
+ // We know that LHS aliases (RHS + OVal.Offset) if the control flow
+ // reaches here. The may-alias query essentially becomes integer
+ // range-overlap queries over two ranges [OVal.Offset, OVal.Offset +
+ // LHSSize) and [0, RHSSize).
+
+ // Try to be conservative on super large offsets
+ if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX))
+ return true;
+
+ auto LHSStart = OVal.Offset;
+ // FIXME: Do we need to guard against integer overflow?
+ auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize);
+ auto RHSStart = 0;
+ auto RHSEnd = static_cast<int64_t>(RHSSize);
+ if (LHSEnd > RHSStart && LHSStart < RHSEnd)
+ return true;
+ }
+ }
+ }
- if (AttrsA.none() || AttrsB.none())
- return false;
- if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
- return true;
- if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
- return true;
return false;
}
@@ -292,8 +585,10 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList,
// If there's an assignment edge from X to Y, it means Y is reachable from
// X at S2 and X is reachable from Y at S1
for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
- propagate(Edge.Other, Src, MatchState::FlowFrom, ReachSet, WorkList);
- propagate(Src, Edge.Other, MatchState::FlowTo, ReachSet, WorkList);
+ propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
+ WorkList);
+ propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
+ WorkList);
}
}
}
@@ -328,16 +623,21 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
auto ToNodeBelow = getNodeBelow(Graph, ToNode);
if (FromNodeBelow && ToNodeBelow &&
MemSet.insert(*FromNodeBelow, *ToNodeBelow)) {
- propagate(*FromNodeBelow, *ToNodeBelow, MatchState::FlowFromMemAlias,
- ReachSet, WorkList);
+ propagate(*FromNodeBelow, *ToNodeBelow,
+ MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList);
for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) {
auto Src = Mapping.first;
- if (Mapping.second.test(static_cast<size_t>(MatchState::FlowFrom)))
- propagate(Src, *ToNodeBelow, MatchState::FlowFromMemAlias, ReachSet,
- WorkList);
- if (Mapping.second.test(static_cast<size_t>(MatchState::FlowTo)))
- propagate(Src, *ToNodeBelow, MatchState::FlowToMemAlias, ReachSet,
- WorkList);
+ auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) {
+ if (Mapping.second.test(static_cast<size_t>(FromState)))
+ propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList);
+ };
+
+ MemAliasPropagate(MatchState::FlowFromReadOnly,
+ MatchState::FlowFromMemAliasReadOnly);
+ MemAliasPropagate(MatchState::FlowToWriteOnly,
+ MatchState::FlowToMemAliasWriteOnly);
+ MemAliasPropagate(MatchState::FlowToReadWrite,
+ MatchState::FlowToMemAliasReadWrite);
}
}
@@ -349,45 +649,54 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
// - If *X and *Y are memory aliases, then X and Y are value aliases
// - If Y is an alias of X, then reverse assignment edges (if there is any)
// should precede any assignment edges on the path from X to Y.
- switch (Item.State) {
- case MatchState::FlowFrom: {
- for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
- propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet,
- WorkList);
+ auto NextAssignState = [&](MatchState State) {
for (const auto &AssignEdge : NodeInfo->Edges)
- propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
- WorkList);
+ propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList);
+ };
+ auto NextRevAssignState = [&](MatchState State) {
+ for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
+ propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList);
+ };
+ auto NextMemState = [&](MatchState State) {
if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
for (const auto &MemAlias : *AliasSet)
- propagate(FromNode, MemAlias, MatchState::FlowFromMemAlias, ReachSet,
- WorkList);
+ propagate(FromNode, MemAlias, State, ReachSet, WorkList);
}
+ };
+
+ switch (Item.State) {
+ case MatchState::FlowFromReadOnly: {
+ NextRevAssignState(MatchState::FlowFromReadOnly);
+ NextAssignState(MatchState::FlowToReadWrite);
+ NextMemState(MatchState::FlowFromMemAliasReadOnly);
break;
}
- case MatchState::FlowFromMemAlias: {
- for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
- propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet,
- WorkList);
- for (const auto &AssignEdge : NodeInfo->Edges)
- propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
- WorkList);
+ case MatchState::FlowFromMemAliasNoReadWrite: {
+ NextRevAssignState(MatchState::FlowFromReadOnly);
+ NextAssignState(MatchState::FlowToWriteOnly);
break;
}
- case MatchState::FlowTo: {
- for (const auto &AssignEdge : NodeInfo->Edges)
- propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
- WorkList);
- if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
- for (const auto &MemAlias : *AliasSet)
- propagate(FromNode, MemAlias, MatchState::FlowToMemAlias, ReachSet,
- WorkList);
- }
+ case MatchState::FlowFromMemAliasReadOnly: {
+ NextRevAssignState(MatchState::FlowFromReadOnly);
+ NextAssignState(MatchState::FlowToReadWrite);
break;
}
- case MatchState::FlowToMemAlias: {
- for (const auto &AssignEdge : NodeInfo->Edges)
- propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
- WorkList);
+ case MatchState::FlowToWriteOnly: {
+ NextAssignState(MatchState::FlowToWriteOnly);
+ NextMemState(MatchState::FlowToMemAliasWriteOnly);
+ break;
+ }
+ case MatchState::FlowToReadWrite: {
+ NextAssignState(MatchState::FlowToReadWrite);
+ NextMemState(MatchState::FlowToMemAliasReadWrite);
+ break;
+ }
+ case MatchState::FlowToMemAliasWriteOnly: {
+ NextAssignState(MatchState::FlowToWriteOnly);
+ break;
+ }
+ case MatchState::FlowToMemAliasReadWrite: {
+ NextAssignState(MatchState::FlowToReadWrite);
break;
}
}
@@ -465,7 +774,8 @@ CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
// to it
auto IValueAttrMap = buildAttrMap(Graph, ReachSet);
- return FunctionInfo(ReachSet, std::move(IValueAttrMap));
+ return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet,
+ std::move(IValueAttrMap));
}
void CFLAndersAAResult::scan(const Function &Fn) {
@@ -530,7 +840,7 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
auto &FunInfo = ensureCached(*Fn);
// AliasMap lookup
- if (FunInfo->mayAlias(ValA, ValB))
+ if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size))
return MayAlias;
return NoAlias;
}
@@ -555,9 +865,9 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
return QueryResult;
}
-char CFLAndersAA::PassID;
+AnalysisKey CFLAndersAA::Key;
-CFLAndersAAResult CFLAndersAA::run(Function &F, AnalysisManager<Function> &AM) {
+CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F));
}
diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h
index bc6e794d0b2a..e526e0e16aa7 100644
--- a/lib/Analysis/CFLGraph.h
+++ b/lib/Analysis/CFLGraph.h
@@ -40,6 +40,7 @@ public:
struct Edge {
Node Other;
+ int64_t Offset;
};
typedef std::vector<Edge> EdgeList;
@@ -107,8 +108,8 @@ public:
auto *ToInfo = getNode(To);
assert(ToInfo != nullptr);
- FromInfo->Edges.push_back(Edge{To});
- ToInfo->ReverseEdges.push_back(Edge{From});
+ FromInfo->Edges.push_back(Edge{To, Offset});
+ ToInfo->ReverseEdges.push_back(Edge{From, Offset});
}
const NodeInfo *getNode(Node N) const {
@@ -151,6 +152,7 @@ template <typename CFLAA> class CFLGraphBuilder {
/// Gets the edges our graph should have, based on an Instruction*
class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
CFLAA &AA;
+ const DataLayout &DL;
const TargetLibraryInfo &TLI;
CFLGraph &Graph;
@@ -225,8 +227,8 @@ template <typename CFLAA> class CFLGraphBuilder {
void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
public:
- GetEdgesVisitor(CFLGraphBuilder &Builder)
- : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
+ GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL)
+ : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph),
ReturnValues(Builder.ReturnedValues) {}
void visitInstruction(Instruction &) {
@@ -281,9 +283,20 @@ template <typename CFLAA> class CFLGraphBuilder {
addAssignEdge(Val, &Inst);
}
+ void visitGEP(GEPOperator &GEPOp) {
+ uint64_t Offset = UnknownOffset;
+ APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()),
+ 0);
+ if (GEPOp.accumulateConstantOffset(DL, APOffset))
+ Offset = APOffset.getSExtValue();
+
+ auto *Op = GEPOp.getPointerOperand();
+ addAssignEdge(Op, &GEPOp, Offset);
+ }
+
void visitGetElementPtrInst(GetElementPtrInst &Inst) {
- auto *Op = Inst.getPointerOperand();
- addAssignEdge(Op, &Inst);
+ auto *GEPOp = cast<GEPOperator>(&Inst);
+ visitGEP(*GEPOp);
}
void visitSelectInst(SelectInst &Inst) {
@@ -321,7 +334,8 @@ template <typename CFLAA> class CFLGraphBuilder {
// For now, we'll handle this like a landingpad instruction (by placing
// the
// result in its own group, and having that group alias externals).
- addNode(&Inst, getAttrUnknown());
+ if (Inst.getType()->isPointerTy())
+ addNode(&Inst, getAttrUnknown());
}
static bool isFunctionExternal(Function *Fn) {
@@ -444,7 +458,8 @@ template <typename CFLAA> class CFLGraphBuilder {
// Exceptions come from "nowhere", from our analysis' perspective.
// So we place the instruction its own group, noting that said group may
// alias externals
- addNode(&Inst, getAttrUnknown());
+ if (Inst.getType()->isPointerTy())
+ addNode(&Inst, getAttrUnknown());
}
void visitInsertValueInst(InsertValueInst &Inst) {
@@ -468,14 +483,97 @@ template <typename CFLAA> class CFLGraphBuilder {
void visitConstantExpr(ConstantExpr *CE) {
switch (CE->getOpcode()) {
+ case Instruction::GetElementPtr: {
+ auto GEPOp = cast<GEPOperator>(CE);
+ visitGEP(*GEPOp);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ auto *Ptr = CE->getOperand(0);
+ addNode(Ptr, getAttrEscaped());
+ break;
+ }
+ case Instruction::IntToPtr: {
+ addNode(CE, getAttrUnknown());
+ break;
+ }
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI: {
+ auto *Src = CE->getOperand(0);
+ addAssignEdge(Src, CE);
+ break;
+ }
+ case Instruction::Select: {
+ auto *TrueVal = CE->getOperand(0);
+ auto *FalseVal = CE->getOperand(1);
+ addAssignEdge(TrueVal, CE);
+ addAssignEdge(FalseVal, CE);
+ break;
+ }
+ case Instruction::InsertElement: {
+ auto *Vec = CE->getOperand(0);
+ auto *Val = CE->getOperand(1);
+ addAssignEdge(Vec, CE);
+ addStoreEdge(Val, CE);
+ break;
+ }
+ case Instruction::ExtractElement: {
+ auto *Ptr = CE->getOperand(0);
+ addLoadEdge(Ptr, CE);
+ break;
+ }
+ case Instruction::InsertValue: {
+ auto *Agg = CE->getOperand(0);
+ auto *Val = CE->getOperand(1);
+ addAssignEdge(Agg, CE);
+ addStoreEdge(Val, CE);
+ break;
+ }
+ case Instruction::ExtractValue: {
+ auto *Ptr = CE->getOperand(0);
+ addLoadEdge(Ptr, CE);
+ }
+ case Instruction::ShuffleVector: {
+ auto *From1 = CE->getOperand(0);
+ auto *From2 = CE->getOperand(1);
+ addAssignEdge(From1, CE);
+ addAssignEdge(From2, CE);
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ addAssignEdge(CE->getOperand(0), CE);
+ addAssignEdge(CE->getOperand(1), CE);
+ break;
+ }
default:
llvm_unreachable("Unknown instruction type encountered!");
-// Build the switch statement using the Instruction.def file.
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
- case Instruction::OPCODE: \
- this->visit##OPCODE(*(CLASS *)CE); \
- break;
-#include "llvm/IR/Instruction.def"
}
}
};
@@ -517,7 +615,7 @@ template <typename CFLAA> class CFLGraphBuilder {
// Builds the graph needed for constructing the StratifiedSets for the given
// function
void buildGraphFrom(Function &Fn) {
- GetEdgesVisitor Visitor(*this);
+ GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout());
for (auto &Bb : Fn.getBasicBlockList())
for (auto &Inst : Bb.getInstList())
diff --git a/lib/Analysis/CFLSteensAliasAnalysis.cpp b/lib/Analysis/CFLSteensAliasAnalysis.cpp
index d816822aaaea..dde24ef5fdd5 100644
--- a/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -153,7 +153,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
if (Itr != InterfaceMap.end()) {
if (CurrValue != Itr->second)
Summary.RetParamRelations.push_back(
- ExternalRelation{CurrValue, Itr->second});
+ ExternalRelation{CurrValue, Itr->second, UnknownOffset});
break;
}
@@ -341,81 +341,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
return NoAlias;
}
-ModRefInfo CFLSteensAAResult::getArgModRefInfo(ImmutableCallSite CS,
- unsigned ArgIdx) {
- if (auto CalledFunc = CS.getCalledFunction()) {
- auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc));
- if (!MaybeInfo.hasValue())
- return MRI_ModRef;
- auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
- auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
-
- bool ArgAttributeIsWritten =
- std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(),
- [ArgIdx](const ExternalAttribute &ExtAttr) {
- return ExtAttr.IValue.Index == ArgIdx + 1;
- });
- bool ArgIsAccessed =
- std::any_of(RetParamRelations.begin(), RetParamRelations.end(),
- [ArgIdx](const ExternalRelation &ExtRelation) {
- return ExtRelation.To.Index == ArgIdx + 1 ||
- ExtRelation.From.Index == ArgIdx + 1;
- });
-
- return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef
- : MRI_ModRef;
- }
-
- return MRI_ModRef;
-}
-
-FunctionModRefBehavior
-CFLSteensAAResult::getModRefBehavior(ImmutableCallSite CS) {
- // If we know the callee, try analyzing it
- if (auto CalledFunc = CS.getCalledFunction())
- return getModRefBehavior(CalledFunc);
-
- // Otherwise, be conservative
- return FMRB_UnknownModRefBehavior;
-}
-
-FunctionModRefBehavior CFLSteensAAResult::getModRefBehavior(const Function *F) {
- assert(F != nullptr);
-
- // TODO: Remove the const_cast
- auto &MaybeInfo = ensureCached(const_cast<Function *>(F));
- if (!MaybeInfo.hasValue())
- return FMRB_UnknownModRefBehavior;
- auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
- auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
-
- // First, if any argument is marked Escpaed, Unknown or Global, anything may
- // happen to them and thus we can't draw any conclusion.
- if (!RetParamAttributes.empty())
- return FMRB_UnknownModRefBehavior;
-
- // Currently we don't (and can't) distinguish reads from writes in
- // RetParamRelations. All we can say is whether there may be memory access or
- // not.
- if (RetParamRelations.empty())
- return FMRB_DoesNotAccessMemory;
-
- // Check if something beyond argmem gets touched.
- bool AccessArgMemoryOnly =
- std::all_of(RetParamRelations.begin(), RetParamRelations.end(),
- [](const ExternalRelation &ExtRelation) {
- // Both DerefLevels has to be 0, since we don't know which
- // one is a read and which is a write.
- return ExtRelation.From.DerefLevel == 0 &&
- ExtRelation.To.DerefLevel == 0;
- });
- return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees
- : FMRB_UnknownModRefBehavior;
-}
-
-char CFLSteensAA::PassID;
+AnalysisKey CFLSteensAA::Key;
-CFLSteensAAResult CFLSteensAA::run(Function &F, AnalysisManager<Function> &AM) {
+CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
}
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index f6f30bb927a5..054bdc45ad67 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -8,17 +8,506 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
using namespace llvm;
-// Explicit instantiations for the core proxy templates.
+// Explicit template instantiations and specialization defininitions for core
+// template typedefs.
namespace llvm {
-template class PassManager<LazyCallGraph::SCC>;
-template class AnalysisManager<LazyCallGraph::SCC>;
+
+// Explicit instantiations for the core proxy templates.
+template class AllAnalysesOn<LazyCallGraph::SCC>;
+template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
+template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager,
+ LazyCallGraph &, CGSCCUpdateResult &>;
template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>;
template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
- LazyCallGraph::SCC>;
-template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
- LazyCallGraph::SCC>;
+ LazyCallGraph::SCC, LazyCallGraph &>;
template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>;
+
+/// Explicitly specialize the pass manager run method to handle call graph
+/// updates.
+template <>
+PreservedAnalyses
+PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
+ CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &G, CGSCCUpdateResult &UR) {
+ PreservedAnalyses PA = PreservedAnalyses::all();
+
+ if (DebugLogging)
+ dbgs() << "Starting CGSCC pass manager run.\n";
+
+ // The SCC may be refined while we are running passes over it, so set up
+ // a pointer that we can update.
+ LazyCallGraph::SCC *C = &InitialC;
+
+ for (auto &Pass : Passes) {
+ if (DebugLogging)
+ dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n";
+
+ PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR);
+
+ // Update the SCC if necessary.
+ C = UR.UpdatedC ? UR.UpdatedC : C;
+
+ // Check that we didn't miss any update scenario.
+ assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+ // Update the analysis manager as each pass runs and potentially
+ // invalidates analyses.
+ AM.invalidate(*C, PassPA);
+
+ // Finally, we intersect the final preserved analyses to compute the
+ // aggregate preserved set for this pass manager.
+ PA.intersect(std::move(PassPA));
+
+ // FIXME: Historically, the pass managers all called the LLVM context's
+ // yield function here. We don't have a generic way to acquire the
+ // context and it isn't yet clear what the right pattern is for yielding
+ // in the new pass manager so it is currently omitted.
+ // ...getContext().yield();
+ }
+
+ // Invaliadtion was handled after each pass in the above loop for the current
+ // SCC. Therefore, the remaining analysis results in the AnalysisManager are
+ // preserved. We mark this with a set so that we don't need to inspect each
+ // one individually.
+ PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+
+ if (DebugLogging)
+ dbgs() << "Finished CGSCC pass manager run.\n";
+
+ return PA;
+}
+
+bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
+ Module &M, const PreservedAnalyses &PA,
+ ModuleAnalysisManager::Invalidator &Inv) {
+ // If literally everything is preserved, we're done.
+ if (PA.areAllPreserved())
+ return false; // This is still a valid proxy.
+
+ // If this proxy or the call graph is going to be invalidated, we also need
+ // to clear all the keys coming from that analysis.
+ //
+ // We also directly invalidate the FAM's module proxy if necessary, and if
+ // that proxy isn't preserved we can't preserve this proxy either. We rely on
+ // it to handle module -> function analysis invalidation in the face of
+ // structural changes and so if it's unavailable we conservatively clear the
+ // entire SCC layer as well rather than trying to do invalidation ourselves.
+ auto PAC = PA.getChecker<CGSCCAnalysisManagerModuleProxy>();
+ if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>()) ||
+ Inv.invalidate<LazyCallGraphAnalysis>(M, PA) ||
+ Inv.invalidate<FunctionAnalysisManagerModuleProxy>(M, PA)) {
+ InnerAM->clear();
+
+ // And the proxy itself should be marked as invalid so that we can observe
+ // the new call graph. This isn't strictly necessary because we cheat
+ // above, but is still useful.
+ return true;
+ }
+
+ // Directly check if the relevant set is preserved so we can short circuit
+ // invalidating SCCs below.
+ bool AreSCCAnalysesPreserved =
+ PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>();
+
+ // Ok, we have a graph, so we can propagate the invalidation down into it.
+ for (auto &RC : G->postorder_ref_sccs())
+ for (auto &C : RC) {
+ Optional<PreservedAnalyses> InnerPA;
+
+ // Check to see whether the preserved set needs to be adjusted based on
+ // module-level analysis invalidation triggering deferred invalidation
+ // for this SCC.
+ if (auto *OuterProxy =
+ InnerAM->getCachedResult<ModuleAnalysisManagerCGSCCProxy>(C))
+ for (const auto &OuterInvalidationPair :
+ OuterProxy->getOuterInvalidations()) {
+ AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first;
+ const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+ if (Inv.invalidate(OuterAnalysisID, M, PA)) {
+ if (!InnerPA)
+ InnerPA = PA;
+ for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+ InnerPA->abandon(InnerAnalysisID);
+ }
+ }
+
+ // Check if we needed a custom PA set. If so we'll need to run the inner
+ // invalidation.
+ if (InnerPA) {
+ InnerAM->invalidate(C, *InnerPA);
+ continue;
+ }
+
+ // Otherwise we only need to do invalidation if the original PA set didn't
+ // preserve all SCC analyses.
+ if (!AreSCCAnalysesPreserved)
+ InnerAM->invalidate(C, PA);
+ }
+
+ // Return false to indicate that this result is still a valid proxy.
+ return false;
+}
+
+template <>
+CGSCCAnalysisManagerModuleProxy::Result
+CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM) {
+ // Force the Function analysis manager to also be available so that it can
+ // be accessed in an SCC analysis and proxied onward to function passes.
+ // FIXME: It is pretty awkward to just drop the result here and assert that
+ // we can find it again later.
+ (void)AM.getResult<FunctionAnalysisManagerModuleProxy>(M);
+
+ return Result(*InnerAM, AM.getResult<LazyCallGraphAnalysis>(M));
+}
+
+AnalysisKey FunctionAnalysisManagerCGSCCProxy::Key;
+
+FunctionAnalysisManagerCGSCCProxy::Result
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG) {
+ // Collect the FunctionAnalysisManager from the Module layer and use that to
+ // build the proxy result.
+ //
+ // This allows us to rely on the FunctionAnalysisMangaerModuleProxy to
+ // invalidate the function analyses.
+ auto &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ Module &M = *C.begin()->getFunction().getParent();
+ auto *FAMProxy = MAM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M);
+ assert(FAMProxy && "The CGSCC pass manager requires that the FAM module "
+ "proxy is run on the module prior to entering the CGSCC "
+ "walk.");
+
+ // Note that we special-case invalidation handling of this proxy in the CGSCC
+ // analysis manager's Module proxy. This avoids the need to do anything
+ // special here to recompute all of this if ever the FAM's module proxy goes
+ // away.
+ return Result(FAMProxy->getManager());
+}
+
+bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
+ LazyCallGraph::SCC &C, const PreservedAnalyses &PA,
+ CGSCCAnalysisManager::Invalidator &Inv) {
+ for (LazyCallGraph::Node &N : C)
+ FAM->invalidate(N.getFunction(), PA);
+
+ // This proxy doesn't need to handle invalidation itself. Instead, the
+ // module-level CGSCC proxy handles it above by ensuring that if the
+ // module-level FAM proxy becomes invalid the entire SCC layer, which
+ // includes this proxy, is cleared.
+ return false;
+}
+
+} // End llvm namespace
+
+namespace {
+/// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c
+/// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly
+/// added SCCs.
+///
+/// The range of new SCCs must be in postorder already. The SCC they were split
+/// out of must be provided as \p C. The current node being mutated and
+/// triggering updates must be passed as \p N.
+///
+/// This function returns the SCC containing \p N. This will be either \p C if
+/// no new SCCs have been split out, or it will be the new SCC containing \p N.
+template <typename SCCRangeT>
+LazyCallGraph::SCC *
+incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
+ LazyCallGraph::Node &N, LazyCallGraph::SCC *C,
+ CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
+ bool DebugLogging = false) {
+ typedef LazyCallGraph::SCC SCC;
+
+ if (NewSCCRange.begin() == NewSCCRange.end())
+ return C;
+
+ // Add the current SCC to the worklist as its shape has changed.
+ UR.CWorklist.insert(C);
+ if (DebugLogging)
+ dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n";
+
+ SCC *OldC = C;
+ (void)OldC;
+
+ // Update the current SCC. Note that if we have new SCCs, this must actually
+ // change the SCC.
+ assert(C != &*NewSCCRange.begin() &&
+ "Cannot insert new SCCs without changing current SCC!");
+ C = &*NewSCCRange.begin();
+ assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
+
+ for (SCC &NewC :
+ reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) {
+ assert(C != &NewC && "No need to re-visit the current SCC!");
+ assert(OldC != &NewC && "Already handled the original SCC!");
+ UR.CWorklist.insert(&NewC);
+ if (DebugLogging)
+ dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n";
+ }
+ return C;
+}
+}
+
+LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
+ LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N,
+ CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) {
+ typedef LazyCallGraph::Node Node;
+ typedef LazyCallGraph::Edge Edge;
+ typedef LazyCallGraph::SCC SCC;
+ typedef LazyCallGraph::RefSCC RefSCC;
+
+ RefSCC &InitialRC = InitialC.getOuterRefSCC();
+ SCC *C = &InitialC;
+ RefSCC *RC = &InitialRC;
+ Function &F = N.getFunction();
+
+ // Walk the function body and build up the set of retained, promoted, and
+ // demoted edges.
+ SmallVector<Constant *, 16> Worklist;
+ SmallPtrSet<Constant *, 16> Visited;
+ SmallPtrSet<Function *, 16> RetainedEdges;
+ SmallSetVector<Function *, 4> PromotedRefTargets;
+ SmallSetVector<Function *, 4> DemotedCallTargets;
+
+ // First walk the function and handle all called functions. We do this first
+ // because if there is a single call edge, whether there are ref edges is
+ // irrelevant.
+ for (Instruction &I : instructions(F))
+ if (auto CS = CallSite(&I))
+ if (Function *Callee = CS.getCalledFunction())
+ if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
+ const Edge *E = N.lookup(*Callee);
+ // FIXME: We should really handle adding new calls. While it will
+ // make downstream usage more complex, there is no fundamental
+ // limitation and it will allow passes within the CGSCC to be a bit
+ // more flexible in what transforms they can do. Until then, we
+ // verify that new calls haven't been introduced.
+ assert(E && "No function transformations should introduce *new* "
+ "call edges! Any new calls should be modeled as "
+ "promoted existing ref edges!");
+ RetainedEdges.insert(Callee);
+ if (!E->isCall())
+ PromotedRefTargets.insert(Callee);
+ }
+
+ // Now walk all references.
+ for (Instruction &I : instructions(F))
+ for (Value *Op : I.operand_values())
+ if (Constant *C = dyn_cast<Constant>(Op))
+ if (Visited.insert(C).second)
+ Worklist.push_back(C);
+
+ LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) {
+ const Edge *E = N.lookup(Referee);
+ // FIXME: Similarly to new calls, we also currently preclude
+ // introducing new references. See above for details.
+ assert(E && "No function transformations should introduce *new* ref "
+ "edges! Any new ref edges would require IPO which "
+ "function passes aren't allowed to do!");
+ RetainedEdges.insert(&Referee);
+ if (E->isCall())
+ DemotedCallTargets.insert(&Referee);
+ });
+
+ // First remove all of the edges that are no longer present in this function.
+ // We have to build a list of dead targets first and then remove them as the
+ // data structures will all be invalidated by removing them.
+ SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets;
+ for (Edge &E : N)
+ if (!RetainedEdges.count(&E.getFunction()))
+ DeadTargets.push_back({E.getNode(), E.getKind()});
+ for (auto DeadTarget : DeadTargets) {
+ Node &TargetN = *DeadTarget.getPointer();
+ bool IsCall = DeadTarget.getInt() == Edge::Call;
+ SCC &TargetC = *G.lookupSCC(TargetN);
+ RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+ if (&TargetRC != RC) {
+ RC->removeOutgoingEdge(N, TargetN);
+ if (DebugLogging)
+ dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN
+ << "'\n";
+ continue;
+ }
+ if (DebugLogging)
+ dbgs() << "Deleting internal " << (IsCall ? "call" : "ref")
+ << " edge from '" << N << "' to '" << TargetN << "'\n";
+
+ if (IsCall) {
+ if (C != &TargetC) {
+ // For separate SCCs this is trivial.
+ RC->switchTrivialInternalEdgeToRef(N, TargetN);
+ } else {
+ // Otherwise we may end up re-structuring the call graph. First,
+ // invalidate any SCC analyses. We have to do this before we split
+ // functions into new SCCs and lose track of where their analyses are
+ // cached.
+ // FIXME: We should accept a more precise preserved set here. For
+ // example, it might be possible to preserve some function analyses
+ // even as the SCC structure is changed.
+ AM.invalidate(*C, PreservedAnalyses::none());
+ // Now update the call graph.
+ C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
+ N, C, AM, UR, DebugLogging);
+ }
+ }
+
+ auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN);
+ if (!NewRefSCCs.empty()) {
+ // Note that we don't bother to invalidate analyses as ref-edge
+ // connectivity is not really observable in any way and is intended
+ // exclusively to be used for ordering of transforms rather than for
+ // analysis conclusions.
+
+ // The RC worklist is in reverse postorder, so we first enqueue the
+ // current RefSCC as it will remain the parent of all split RefSCCs, then
+ // we enqueue the new ones in RPO except for the one which contains the
+ // source node as that is the "bottom" we will continue processing in the
+ // bottom-up walk.
+ UR.RCWorklist.insert(RC);
+ if (DebugLogging)
+ dbgs() << "Enqueuing the existing RefSCC in the update worklist: "
+ << *RC << "\n";
+ // Update the RC to the "bottom".
+ assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!");
+ RC = &C->getOuterRefSCC();
+ assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!");
+ assert(NewRefSCCs.front() == RC &&
+ "New current RefSCC not first in the returned list!");
+ for (RefSCC *NewRC : reverse(
+ make_range(std::next(NewRefSCCs.begin()), NewRefSCCs.end()))) {
+ assert(NewRC != RC && "Should not encounter the current RefSCC further "
+ "in the postorder list of new RefSCCs.");
+ UR.RCWorklist.insert(NewRC);
+ if (DebugLogging)
+ dbgs() << "Enqueuing a new RefSCC in the update worklist: " << *NewRC
+ << "\n";
+ }
+ }
+ }
+
+ // Next demote all the call edges that are now ref edges. This helps make
+ // the SCCs small which should minimize the work below as we don't want to
+ // form cycles that this would break.
+ for (Function *RefTarget : DemotedCallTargets) {
+ Node &TargetN = *G.lookup(*RefTarget);
+ SCC &TargetC = *G.lookupSCC(TargetN);
+ RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+ // The easy case is when the target RefSCC is not this RefSCC. This is
+ // only supported when the target RefSCC is a child of this RefSCC.
+ if (&TargetRC != RC) {
+ assert(RC->isAncestorOf(TargetRC) &&
+ "Cannot potentially form RefSCC cycles here!");
+ RC->switchOutgoingEdgeToRef(N, TargetN);
+ if (DebugLogging)
+ dbgs() << "Switch outgoing call edge to a ref edge from '" << N
+ << "' to '" << TargetN << "'\n";
+ continue;
+ }
+
+ // We are switching an internal call edge to a ref edge. This may split up
+ // some SCCs.
+ if (C != &TargetC) {
+ // For separate SCCs this is trivial.
+ RC->switchTrivialInternalEdgeToRef(N, TargetN);
+ continue;
+ }
+
+ // Otherwise we may end up re-structuring the call graph. First, invalidate
+ // any SCC analyses. We have to do this before we split functions into new
+ // SCCs and lose track of where their analyses are cached.
+ // FIXME: We should accept a more precise preserved set here. For example,
+ // it might be possible to preserve some function analyses even as the SCC
+ // structure is changed.
+ AM.invalidate(*C, PreservedAnalyses::none());
+ // Now update the call graph.
+ C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
+ N, C, AM, UR, DebugLogging);
+ }
+
+ // Now promote ref edges into call edges.
+ for (Function *CallTarget : PromotedRefTargets) {
+ Node &TargetN = *G.lookup(*CallTarget);
+ SCC &TargetC = *G.lookupSCC(TargetN);
+ RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+ // The easy case is when the target RefSCC is not this RefSCC. This is
+ // only supported when the target RefSCC is a child of this RefSCC.
+ if (&TargetRC != RC) {
+ assert(RC->isAncestorOf(TargetRC) &&
+ "Cannot potentially form RefSCC cycles here!");
+ RC->switchOutgoingEdgeToCall(N, TargetN);
+ if (DebugLogging)
+ dbgs() << "Switch outgoing ref edge to a call edge from '" << N
+ << "' to '" << TargetN << "'\n";
+ continue;
+ }
+ if (DebugLogging)
+ dbgs() << "Switch an internal ref edge to a call edge from '" << N
+ << "' to '" << TargetN << "'\n";
+
+ // Otherwise we are switching an internal ref edge to a call edge. This
+ // may merge away some SCCs, and we add those to the UpdateResult. We also
+ // need to make sure to update the worklist in the event SCCs have moved
+ // before the current one in the post-order sequence.
+ auto InitialSCCIndex = RC->find(*C) - RC->begin();
+ auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN);
+ if (!InvalidatedSCCs.empty()) {
+ C = &TargetC;
+ assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
+
+ // Any analyses cached for this SCC are no longer precise as the shape
+ // has changed by introducing this cycle.
+ AM.invalidate(*C, PreservedAnalyses::none());
+
+ for (SCC *InvalidatedC : InvalidatedSCCs) {
+ assert(InvalidatedC != C && "Cannot invalidate the current SCC!");
+ UR.InvalidatedSCCs.insert(InvalidatedC);
+
+ // Also clear any cached analyses for the SCCs that are dead. This
+ // isn't really necessary for correctness but can release memory.
+ AM.clear(*InvalidatedC);
+ }
+ }
+ auto NewSCCIndex = RC->find(*C) - RC->begin();
+ if (InitialSCCIndex < NewSCCIndex) {
+ // Put our current SCC back onto the worklist as we'll visit other SCCs
+ // that are now definitively ordered prior to the current one in the
+ // post-order sequence, and may end up observing more precise context to
+ // optimize the current SCC.
+ UR.CWorklist.insert(C);
+ if (DebugLogging)
+ dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n";
+ // Enqueue in reverse order as we pop off the back of the worklist.
+ for (SCC &MovedC : reverse(make_range(RC->begin() + InitialSCCIndex,
+ RC->begin() + NewSCCIndex))) {
+ UR.CWorklist.insert(&MovedC);
+ if (DebugLogging)
+ dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC
+ << "\n";
+ }
+ }
+ }
+
+ assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!");
+ assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!");
+ assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!");
+
+ // Record the current RefSCC and SCC for higher layers of the CGSCC pass
+ // manager now that all the updates have been applied.
+ if (RC != &InitialRC)
+ UR.UpdatedRC = RC;
+ if (C != &InitialC)
+ UR.UpdatedC = C;
+
+ return *C;
}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 57ad437ef4fd..08d50c29dfc8 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -37,6 +37,7 @@ add_llvm_library(LLVMAnalysis
Interval.cpp
IntervalPartition.cpp
IteratedDominanceFrontier.cpp
+ LazyBranchProbabilityInfo.cpp
LazyBlockFrequencyInfo.cpp
LazyCallGraph.cpp
LazyValueInfo.cpp
@@ -82,6 +83,7 @@ add_llvm_library(LLVMAnalysis
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis
- )
-add_dependencies(LLVMAnalysis intrinsics_gen)
+ DEPENDS
+ intrinsics_gen
+ )
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 39cb86d2ccb1..458b7bfae959 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -258,10 +258,10 @@ void CallGraphNode::replaceCallEdge(CallSite CS,
}
// Provide an explicit template instantiation for the static ID.
-char CallGraphAnalysis::PassID;
+AnalysisKey CallGraphAnalysis::Key;
PreservedAnalyses CallGraphPrinterPass::run(Module &M,
- AnalysisManager<Module> &AM) {
+ ModuleAnalysisManager &AM) {
AM.getResult<CallGraphAnalysis>(M).print(OS);
return PreservedAnalyses::all();
}
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 69d767354785..9cef78144150 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -67,9 +67,7 @@ public:
Info.setPreservesAll();
}
- const char *getPassName() const override {
- return "CallGraph Pass Manager";
- }
+ StringRef getPassName() const override { return "CallGraph Pass Manager"; }
PMDataManager *getAsPMDataManager() override { return this; }
Pass *getAsPass() override { return this; }
@@ -100,7 +98,7 @@ private:
bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
CallGraph &CG, bool &CallGraphUpToDate,
bool &DevirtualizedCall);
- bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+ bool RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
bool IsCheckingMode);
};
@@ -175,8 +173,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
/// a function pass like GVN optimizes away stuff feeding the indirect call.
/// This never happens in checking mode.
///
-bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
- CallGraph &CG, bool CheckingMode) {
+bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
+ bool CheckingMode) {
DenseMap<Value*, CallGraphNode*> CallSites;
DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
@@ -450,7 +448,7 @@ bool CGPassManager::runOnModule(Module &M) {
// Copy the current SCC and increment past it so that the pass can hack
// on the SCC if it wants to without invalidating our iterator.
const std::vector<CallGraphNode *> &NodeVec = *CGI;
- CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size());
+ CurSCC.initialize(NodeVec);
++CGI;
// At the top level, we run all the passes in this pass manager on the
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index ed8370498dd0..bdffdd8eb270 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -27,36 +27,45 @@
using namespace llvm;
-static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
- SmallPtrSetImpl<const Value*> &EphValues) {
- SmallPtrSet<const Value *, 32> Visited;
-
- // Make sure that all of the items in WorkSet are in our EphValues set.
- EphValues.insert(WorkSet.begin(), WorkSet.end());
+static void
+appendSpeculatableOperands(const Value *V,
+ SmallPtrSetImpl<const Value *> &Visited,
+ SmallVectorImpl<const Value *> &Worklist) {
+ const User *U = dyn_cast<User>(V);
+ if (!U)
+ return;
+
+ for (const Value *Operand : U->operands())
+ if (Visited.insert(Operand).second)
+ if (isSafeToSpeculativelyExecute(Operand))
+ Worklist.push_back(Operand);
+}
+static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited,
+ SmallVectorImpl<const Value *> &Worklist,
+ SmallPtrSetImpl<const Value *> &EphValues) {
// Note: We don't speculate PHIs here, so we'll miss instruction chains kept
// alive only by ephemeral values.
- while (!WorkSet.empty()) {
- const Value *V = WorkSet.front();
- WorkSet.erase(WorkSet.begin());
+ // Walk the worklist using an index but without caching the size so we can
+ // append more entries as we process the worklist. This forms a queue without
+ // quadratic behavior by just leaving processed nodes at the head of the
+ // worklist forever.
+ for (int i = 0; i < (int)Worklist.size(); ++i) {
+ const Value *V = Worklist[i];
- if (!Visited.insert(V).second)
- continue;
+ assert(Visited.count(V) &&
+ "Failed to add a worklist entry to our visited set!");
// If all uses of this value are ephemeral, then so is this value.
- if (!std::all_of(V->user_begin(), V->user_end(),
- [&](const User *U) { return EphValues.count(U); }))
+ if (!all_of(V->users(), [&](const User *U) { return EphValues.count(U); }))
continue;
EphValues.insert(V);
DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
- if (const User *U = dyn_cast<User>(V))
- for (const Value *J : U->operands()) {
- if (isSafeToSpeculativelyExecute(J))
- WorkSet.push_back(J);
- }
+ // Append any more operands to consider.
+ appendSpeculatableOperands(V, Visited, Worklist);
}
}
@@ -64,29 +73,32 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
void CodeMetrics::collectEphemeralValues(
const Loop *L, AssumptionCache *AC,
SmallPtrSetImpl<const Value *> &EphValues) {
- SmallVector<const Value *, 16> WorkSet;
+ SmallPtrSet<const Value *, 32> Visited;
+ SmallVector<const Value *, 16> Worklist;
for (auto &AssumeVH : AC->assumptions()) {
if (!AssumeVH)
continue;
Instruction *I = cast<Instruction>(AssumeVH);
- // Filter out call sites outside of the loop so we don't to a function's
+ // Filter out call sites outside of the loop so we don't do a function's
// worth of work for each of its loops (and, in the common case, ephemeral
// values in the loop are likely due to @llvm.assume calls in the loop).
if (!L->contains(I->getParent()))
continue;
- WorkSet.push_back(I);
+ if (EphValues.insert(I).second)
+ appendSpeculatableOperands(I, Visited, Worklist);
}
- completeEphemeralValues(WorkSet, EphValues);
+ completeEphemeralValues(Visited, Worklist, EphValues);
}
void CodeMetrics::collectEphemeralValues(
const Function *F, AssumptionCache *AC,
SmallPtrSetImpl<const Value *> &EphValues) {
- SmallVector<const Value *, 16> WorkSet;
+ SmallPtrSet<const Value *, 32> Visited;
+ SmallVector<const Value *, 16> Worklist;
for (auto &AssumeVH : AC->assumptions()) {
if (!AssumeVH)
@@ -94,17 +106,19 @@ void CodeMetrics::collectEphemeralValues(
Instruction *I = cast<Instruction>(AssumeVH);
assert(I->getParent()->getParent() == F &&
"Found assumption for the wrong function!");
- WorkSet.push_back(I);
+
+ if (EphValues.insert(I).second)
+ appendSpeculatableOperands(I, Visited, Worklist);
}
- completeEphemeralValues(WorkSet, EphValues);
+ completeEphemeralValues(Visited, Worklist, EphValues);
}
/// Fill in the current structure with information gleaned from the specified
/// block.
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
const TargetTransformInfo &TTI,
- SmallPtrSetImpl<const Value*> &EphValues) {
+ const SmallPtrSetImpl<const Value*> &EphValues) {
++NumBlocks;
unsigned NumInstsBeforeThisBB = NumInsts;
for (const Instruction &I : *BB) {
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index c9adaa7b111c..73867279abe4 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -17,29 +17,38 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/config.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cerrno>
#include <cfenv>
#include <cmath>
-#include <limits>
+#include <cstddef>
+#include <cstdint>
using namespace llvm;
@@ -49,6 +58,36 @@ namespace {
// Constant Folding internal helper functions
//===----------------------------------------------------------------------===//
+static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
+ Constant *C, Type *SrcEltTy,
+ unsigned NumSrcElts,
+ const DataLayout &DL) {
+ // Now that we know that the input value is a vector of integers, just shift
+ // and insert them into our result.
+ unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ Constant *Element;
+ if (DL.isLittleEndian())
+ Element = C->getAggregateElement(NumSrcElts - i - 1);
+ else
+ Element = C->getAggregateElement(i);
+
+ if (Element && isa<UndefValue>(Element)) {
+ Result <<= BitShift;
+ continue;
+ }
+
+ auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
+ if (!ElementCI)
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ Result <<= BitShift;
+ Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
+ }
+
+ return nullptr;
+}
+
/// Constant fold bitcast, symbolically evaluating it with DataLayout.
/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
@@ -60,45 +99,33 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
!DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
return Constant::getAllOnesValue(DestTy);
- // Handle a vector->integer cast.
- if (auto *IT = dyn_cast<IntegerType>(DestTy)) {
- auto *VTy = dyn_cast<VectorType>(C->getType());
- if (!VTy)
- return ConstantExpr::getBitCast(C, DestTy);
+ if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
+ // Handle a vector->scalar integer/fp cast.
+ if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
+ unsigned NumSrcElts = VTy->getNumElements();
+ Type *SrcEltTy = VTy->getElementType();
+
+ // If the vector is a vector of floating point, convert it to vector of int
+ // to simplify things.
+ if (SrcEltTy->isFloatingPointTy()) {
+ unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+ Type *SrcIVTy =
+ VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+ // Ask IR to do the conversion now that #elts line up.
+ C = ConstantExpr::getBitCast(C, SrcIVTy);
+ }
- unsigned NumSrcElts = VTy->getNumElements();
- Type *SrcEltTy = VTy->getElementType();
-
- // If the vector is a vector of floating point, convert it to vector of int
- // to simplify things.
- if (SrcEltTy->isFloatingPointTy()) {
- unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
- Type *SrcIVTy =
- VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
- // Ask IR to do the conversion now that #elts line up.
- C = ConstantExpr::getBitCast(C, SrcIVTy);
- }
+ APInt Result(DL.getTypeSizeInBits(DestTy), 0);
+ if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
+ SrcEltTy, NumSrcElts, DL))
+ return CE;
- // Now that we know that the input value is a vector of integers, just shift
- // and insert them into our result.
- unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
- APInt Result(IT->getBitWidth(), 0);
- for (unsigned i = 0; i != NumSrcElts; ++i) {
- Constant *Element;
- if (DL.isLittleEndian())
- Element = C->getAggregateElement(NumSrcElts-i-1);
- else
- Element = C->getAggregateElement(i);
-
- auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
- if (!ElementCI)
- return ConstantExpr::getBitCast(C, DestTy);
+ if (isa<IntegerType>(DestTy))
+ return ConstantInt::get(DestTy, Result);
- Result <<= BitShift;
- Result |= ElementCI->getValue().zextOrSelf(IT->getBitWidth());
+ APFloat FP(DestTy->getFltSemantics(), Result);
+ return ConstantFP::get(DestTy->getContext(), FP);
}
-
- return ConstantInt::get(IT, Result);
}
// The code below only handles casts to vectors currently.
@@ -180,7 +207,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
Constant *Elt = Zero;
unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
for (unsigned j = 0; j != Ratio; ++j) {
- Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
+ Constant *Src = C->getAggregateElement(SrcElt++);
+ if (Src && isa<UndefValue>(Src))
+ Src = Constant::getNullValue(C->getType()->getVectorElementType());
+ else
+ Src = dyn_cast_or_null<ConstantInt>(Src);
if (!Src) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
@@ -206,8 +237,19 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
- auto *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
- if (!Src) // Reject constantexpr elements.
+ auto *Element = C->getAggregateElement(i);
+
+ if (!Element) // Reject constantexpr elements.
+ return ConstantExpr::getBitCast(C, DestTy);
+
+ if (isa<UndefValue>(Element)) {
+ // Correctly Propagate undef values.
+ Result.append(Ratio, UndefValue::get(DstEltTy));
+ continue;
+ }
+
+ auto *Src = dyn_cast<ConstantInt>(Element);
+ if (!Src)
return ConstantExpr::getBitCast(C, DestTy);
unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
@@ -333,7 +375,7 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
uint64_t CurEltOffset = SL->getElementOffset(Index);
ByteOffset -= CurEltOffset;
- while (1) {
+ while (true) {
// If the element access is to the element itself and not to tail padding,
// read the bytes from the element.
uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
@@ -689,23 +731,27 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
/// If array indices are not pointer-sized integers, explicitly cast them so
/// that they aren't implicitly casted by the getelementptr.
Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
- Type *ResultTy, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ Type *ResultTy, Optional<unsigned> InRangeIndex,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
Type *IntPtrTy = DL.getIntPtrType(ResultTy);
+ Type *IntPtrScalarTy = IntPtrTy->getScalarType();
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
- !isa<StructType>(GetElementPtrInst::getIndexedType(SrcElemTy,
- Ops.slice(1, i - 1)))) &&
- Ops[i]->getType() != IntPtrTy) {
+ !isa<StructType>(GetElementPtrInst::getIndexedType(
+ SrcElemTy, Ops.slice(1, i - 1)))) &&
+ Ops[i]->getType()->getScalarType() != IntPtrScalarTy) {
Any = true;
+ Type *NewType = Ops[i]->getType()->isVectorTy()
+ ? IntPtrTy
+ : IntPtrTy->getScalarType();
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
true,
- IntPtrTy,
+ NewType,
true),
- Ops[i], IntPtrTy));
+ Ops[i], NewType));
} else
NewIdxs.push_back(Ops[i]);
}
@@ -713,11 +759,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
if (!Any)
return nullptr;
- Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs);
- if (auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
- C = Folded;
- }
+ Constant *C = ConstantExpr::getGetElementPtr(
+ SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
+ if (Constant *Folded = ConstantFoldConstant(C, DL, TLI))
+ C = Folded;
return C;
}
@@ -744,13 +789,17 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
+ const GEPOperator *InnermostGEP = GEP;
+ bool InBounds = GEP->isInBounds();
+
Type *SrcElemTy = GEP->getSourceElementType();
Type *ResElemTy = GEP->getResultElementType();
Type *ResTy = GEP->getType();
if (!SrcElemTy->isSized())
return nullptr;
- if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, DL, TLI))
+ if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
+ GEP->getInRangeIndex(), DL, TLI))
return C;
Constant *Ptr = Ops[0];
@@ -775,8 +824,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
Res = ConstantExpr::getSub(Res, CE->getOperand(1));
Res = ConstantExpr::getIntToPtr(Res, ResTy);
- if (auto *ResCE = dyn_cast<ConstantExpr>(Res))
- Res = ConstantFoldConstantExpression(ResCE, DL, TLI);
+ if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
+ Res = FoldedRes;
return Res;
}
}
@@ -793,6 +842,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// If this is a GEP of a GEP, fold it all into a single GEP.
while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+ InnermostGEP = GEP;
+ InBounds &= GEP->isInBounds();
+
SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
// Do not try the incorporate the sub-GEP if some index is not a number.
@@ -821,7 +873,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
}
}
- if (Ptr->isNullValue() || BasePtr != 0) {
+ auto *PTy = cast<PointerType>(Ptr->getType());
+ if ((Ptr->isNullValue() || BasePtr != 0) &&
+ !DL.isNonIntegralPointerType(PTy)) {
Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
return ConstantExpr::getIntToPtr(C, ResTy);
}
@@ -830,8 +884,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// we eliminate over-indexing of the notional static type array bounds.
// This makes it easy to determine if the getelementptr is "inbounds".
// Also, this helps GlobalOpt do SROA on GlobalVariables.
- Type *Ty = Ptr->getType();
- assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
+ Type *Ty = PTy;
SmallVector<Constant *, 32> NewIdxs;
do {
@@ -897,8 +950,23 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
if (Offset != 0)
return nullptr;
+ // Preserve the inrange index from the innermost GEP if possible. We must
+ // have calculated the same indices up to and including the inrange index.
+ Optional<unsigned> InRangeIndex;
+ if (Optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex())
+ if (SrcElemTy == InnermostGEP->getSourceElementType() &&
+ NewIdxs.size() > *LastIRIndex) {
+ InRangeIndex = LastIRIndex;
+ for (unsigned I = 0; I <= *LastIRIndex; ++I)
+ if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) {
+ InRangeIndex = None;
+ break;
+ }
+ }
+
// Create a GEP.
- Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs);
+ Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
+ InBounds, InRangeIndex);
assert(C->getType()->getPointerElementType() == Ty &&
"Computed GetElementPtr has unexpected type!");
@@ -916,15 +984,16 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
/// attempting to fold instructions like loads and stores, which have no
/// constant expression form.
///
-/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
-/// information, due to only being passed an opcode and operands. Constant
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange
+/// etc information, due to only being passed an opcode and operands. Constant
/// folding using this function strips this information.
///
-Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
- unsigned Opcode,
+Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
+ Type *DestTy = InstOrCE->getType();
+
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode))
return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
@@ -936,10 +1005,14 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
return C;
- return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(),
- Ops[0], Ops.slice(1));
+ return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
+ Ops.slice(1), GEP->isInBounds(),
+ GEP->getInRangeIndex());
}
+ if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
+ return CE->getWithOperands(Ops);
+
switch (Opcode) {
default: return nullptr;
case Instruction::ICmp:
@@ -966,12 +1039,58 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
// Constant Folding public APIs
//===----------------------------------------------------------------------===//
+namespace {
+
+Constant *
+ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ SmallDenseMap<Constant *, Constant *> &FoldedOps) {
+ if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
+ return nullptr;
+
+ SmallVector<Constant *, 8> Ops;
+ for (const Use &NewU : C->operands()) {
+ auto *NewC = cast<Constant>(&NewU);
+ // Recursively fold the ConstantExpr's operands. If we have already folded
+ // a ConstantExpr, we don't have to process it again.
+ if (isa<ConstantVector>(NewC) || isa<ConstantExpr>(NewC)) {
+ auto It = FoldedOps.find(NewC);
+ if (It == FoldedOps.end()) {
+ if (auto *FoldedC =
+ ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) {
+ NewC = FoldedC;
+ FoldedOps.insert({NewC, FoldedC});
+ } else {
+ FoldedOps.insert({NewC, NewC});
+ }
+ } else {
+ NewC = It->second;
+ }
+ }
+ Ops.push_back(NewC);
+ }
+
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ if (CE->isCompare())
+ return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+ DL, TLI);
+
+ return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
+ }
+
+ assert(isa<ConstantVector>(C));
+ return ConstantVector::get(Ops);
+}
+
+} // end anonymous namespace
+
Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Handle PHI nodes quickly here...
if (auto *PN = dyn_cast<PHINode>(I)) {
Constant *CommonValue = nullptr;
+ SmallDenseMap<Constant *, Constant *> FoldedOps;
for (Value *Incoming : PN->incoming_values()) {
// If the incoming value is undef then skip it. Note that while we could
// skip the value if it is equal to the phi node itself we choose not to
@@ -984,8 +1103,8 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
if (!C)
return nullptr;
// Fold the PHI's operands.
- if (auto *NewC = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(NewC, DL, TLI);
+ if (auto *FoldedC = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps))
+ C = FoldedC;
// If the incoming value is a different constant to
// the one we saw previously, then give up.
if (CommonValue && C != CommonValue)
@@ -993,7 +1112,6 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
CommonValue = C;
}
-
// If we reach here, all incoming values are the same constant or undef.
return CommonValue ? CommonValue : UndefValue::get(PN->getType());
}
@@ -1003,12 +1121,13 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))
return nullptr;
+ SmallDenseMap<Constant *, Constant *> FoldedOps;
SmallVector<Constant *, 8> Ops;
for (const Use &OpU : I->operands()) {
auto *Op = cast<Constant>(&OpU);
// Fold the Instruction's operands.
- if (auto *NewCE = dyn_cast<ConstantExpr>(Op))
- Op = ConstantFoldConstantExpression(NewCE, DL, TLI);
+ if (auto *FoldedOp = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps))
+ Op = FoldedOp;
Ops.push_back(Op);
}
@@ -1036,55 +1155,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
return ConstantFoldInstOperands(I, Ops, DL, TLI);
}
-namespace {
-
-Constant *
-ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
- SmallVector<Constant *, 8> Ops;
- for (const Use &NewU : CE->operands()) {
- auto *NewC = cast<Constant>(&NewU);
- // Recursively fold the ConstantExpr's operands. If we have already folded
- // a ConstantExpr, we don't have to process it again.
- if (auto *NewCE = dyn_cast<ConstantExpr>(NewC)) {
- if (FoldedOps.insert(NewCE).second)
- NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps);
- }
- Ops.push_back(NewC);
- }
-
- if (CE->isCompare())
- return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
- DL, TLI);
-
- return ConstantFoldInstOperandsImpl(CE, CE->getType(), CE->getOpcode(), Ops,
- DL, TLI);
-}
-
-} // end anonymous namespace
-
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- SmallPtrSet<ConstantExpr *, 4> FoldedOps;
- return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps);
+Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ SmallDenseMap<Constant *, Constant *> FoldedOps;
+ return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
}
Constant *llvm::ConstantFoldInstOperands(Instruction *I,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- return ConstantFoldInstOperandsImpl(I, I->getType(), I->getOpcode(), Ops, DL,
- TLI);
-}
-
-Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
- ArrayRef<Constant *> Ops,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- assert(Opcode != Instruction::GetElementPtr && "Invalid for GEPs");
- return ConstantFoldInstOperandsImpl(nullptr, DestTy, Opcode, Ops, DL, TLI);
+ return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
}
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
@@ -1350,6 +1431,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
Name == "log10f";
case 'p':
return Name == "pow" || Name == "powf";
+ case 'r':
+ return Name == "round" || Name == "roundf";
case 's':
return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
@@ -1364,7 +1447,7 @@ Constant *GetConstantFoldFPValue(double V, Type *Ty) {
if (Ty->isHalfTy()) {
APFloat APF(V);
bool unused;
- APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+ APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused);
return ConstantFP::get(Ty->getContext(), APF);
}
if (Ty->isFloatTy())
@@ -1455,7 +1538,7 @@ double getValueAsDouble(ConstantFP *Op) {
bool unused;
APFloat APF = Op->getValueAPF();
- APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+ APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);
return APF.convertToDouble();
}
@@ -1473,7 +1556,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
APFloat Val(Op->getValueAPF());
bool lost = false;
- Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
}
@@ -1614,6 +1697,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
}
}
break;
+ case 'r':
+ if ((Name == "round" && TLI->has(LibFunc::round)) ||
+ (Name == "roundf" && TLI->has(LibFunc::roundf)))
+ return ConstantFoldFP(round, V, Ty);
case 's':
if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
(Name == "sinf" && TLI->has(LibFunc::sinf)))
@@ -1648,7 +1735,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
case Intrinsic::bitreverse:
return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
case Intrinsic::convert_from_fp16: {
- APFloat Val(APFloat::IEEEhalf, Op->getValue());
+ APFloat Val(APFloat::IEEEhalf(), Op->getValue());
bool lost = false;
APFloat::opStatus status = Val.convert(
@@ -1927,3 +2014,152 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI);
}
+
+bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
+ // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
+ // (and to some extent ConstantFoldScalarCall).
+ Function *F = CS.getCalledFunction();
+ if (!F)
+ return false;
+
+ LibFunc::Func Func;
+ if (!TLI || !TLI->getLibFunc(*F, Func))
+ return false;
+
+ if (CS.getNumArgOperands() == 1) {
+ if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
+ const APFloat &Op = OpC->getValueAPF();
+ switch (Func) {
+ case LibFunc::logl:
+ case LibFunc::log:
+ case LibFunc::logf:
+ case LibFunc::log2l:
+ case LibFunc::log2:
+ case LibFunc::log2f:
+ case LibFunc::log10l:
+ case LibFunc::log10:
+ case LibFunc::log10f:
+ return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
+
+ case LibFunc::expl:
+ case LibFunc::exp:
+ case LibFunc::expf:
+ // FIXME: These boundaries are slightly conservative.
+ if (OpC->getType()->isDoubleTy())
+ return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan &&
+ Op.compare(APFloat(709.0)) != APFloat::cmpGreaterThan;
+ if (OpC->getType()->isFloatTy())
+ return Op.compare(APFloat(-103.0f)) != APFloat::cmpLessThan &&
+ Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan;
+ break;
+
+ case LibFunc::exp2l:
+ case LibFunc::exp2:
+ case LibFunc::exp2f:
+ // FIXME: These boundaries are slightly conservative.
+ if (OpC->getType()->isDoubleTy())
+ return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan &&
+ Op.compare(APFloat(1023.0)) != APFloat::cmpGreaterThan;
+ if (OpC->getType()->isFloatTy())
+ return Op.compare(APFloat(-149.0f)) != APFloat::cmpLessThan &&
+ Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan;
+ break;
+
+ case LibFunc::sinl:
+ case LibFunc::sin:
+ case LibFunc::sinf:
+ case LibFunc::cosl:
+ case LibFunc::cos:
+ case LibFunc::cosf:
+ return !Op.isInfinity();
+
+ case LibFunc::tanl:
+ case LibFunc::tan:
+ case LibFunc::tanf: {
+ // FIXME: Stop using the host math library.
+ // FIXME: The computation isn't done in the right precision.
+ Type *Ty = OpC->getType();
+ if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
+ double OpV = getValueAsDouble(OpC);
+ return ConstantFoldFP(tan, OpV, Ty) != nullptr;
+ }
+ break;
+ }
+
+ case LibFunc::asinl:
+ case LibFunc::asin:
+ case LibFunc::asinf:
+ case LibFunc::acosl:
+ case LibFunc::acos:
+ case LibFunc::acosf:
+ return Op.compare(APFloat(Op.getSemantics(), "-1")) !=
+ APFloat::cmpLessThan &&
+ Op.compare(APFloat(Op.getSemantics(), "1")) !=
+ APFloat::cmpGreaterThan;
+
+ case LibFunc::sinh:
+ case LibFunc::cosh:
+ case LibFunc::sinhf:
+ case LibFunc::coshf:
+ case LibFunc::sinhl:
+ case LibFunc::coshl:
+ // FIXME: These boundaries are slightly conservative.
+ if (OpC->getType()->isDoubleTy())
+ return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan &&
+ Op.compare(APFloat(710.0)) != APFloat::cmpGreaterThan;
+ if (OpC->getType()->isFloatTy())
+ return Op.compare(APFloat(-89.0f)) != APFloat::cmpLessThan &&
+ Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan;
+ break;
+
+ case LibFunc::sqrtl:
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ return Op.isNaN() || Op.isZero() || !Op.isNegative();
+
+ // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
+ // maybe others?
+ default:
+ break;
+ }
+ }
+ }
+
+ if (CS.getNumArgOperands() == 2) {
+ ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0));
+ ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1));
+ if (Op0C && Op1C) {
+ const APFloat &Op0 = Op0C->getValueAPF();
+ const APFloat &Op1 = Op1C->getValueAPF();
+
+ switch (Func) {
+ case LibFunc::powl:
+ case LibFunc::pow:
+ case LibFunc::powf: {
+ // FIXME: Stop using the host math library.
+ // FIXME: The computation isn't done in the right precision.
+ Type *Ty = Op0C->getType();
+ if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
+ if (Ty == Op1C->getType()) {
+ double Op0V = getValueAsDouble(Op0C);
+ double Op1V = getValueAsDouble(Op1C);
+ return ConstantFoldBinaryFP(pow, Op0V, Op1V, Ty) != nullptr;
+ }
+ }
+ break;
+ }
+
+ case LibFunc::fmodl:
+ case LibFunc::fmod:
+ case LibFunc::fmodf:
+ return Op0.isNaN() || Op1.isNaN() ||
+ (!Op0.isInfinity() && !Op1.isZero());
+
+ default:
+ break;
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 68a4bea96baa..67d1773f0811 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -89,14 +90,35 @@ CostModelAnalysis::runOnFunction(Function &F) {
return false;
}
-static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
+static bool isReverseVectorMask(ArrayRef<int> Mask) {
for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
- if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
+ if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
return false;
return true;
}
-static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
+static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
+ bool Vec0 = false;
+ bool Vec1 = false;
+ for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
+ if (Mask[i] >= 0) {
+ if ((unsigned)Mask[i] >= NumVecElts)
+ Vec1 = true;
+ else
+ Vec0 = true;
+ }
+ }
+ return !(Vec0 && Vec1);
+}
+
+static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
+ for (unsigned i = 0; i < Mask.size(); ++i)
+ if (Mask[i] > 0)
+ return false;
+ return true;
+}
+
+static bool isAlternateVectorMask(ArrayRef<int> Mask) {
bool isAlternate = true;
unsigned MaskSize = Mask.size();
@@ -123,7 +145,7 @@ static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
TargetTransformInfo::OperandValueKind OpInfo =
- TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OK_AnyValue;
// Check for a splat of a constant or for a non uniform vector of constants.
if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
@@ -132,6 +154,12 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
OpInfo = TargetTransformInfo::OK_UniformConstantValue;
}
+ // Check for a splat of a uniform value. This is not loop aware, so return
+ // true only for the obviously uniform cases (argument, globalvalue)
+ const Value *Splat = getSplatValue(V);
+ if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+ OpInfo = TargetTransformInfo::OK_UniformValue;
+
return OpInfo;
}
@@ -494,6 +522,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
if (isAlternateVectorMask(Mask))
return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
VecTypOp0, 0, nullptr);
+
+ if (isZeroEltBroadcastVectorMask(Mask))
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
+ VecTypOp0, 0, nullptr);
+
+ if (isSingleSourceVectorMask(Mask))
+ return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+ VecTypOp0, 0, nullptr);
+
+ return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+ VecTypOp0, 0, nullptr);
}
return -1;
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index a3f8b7fda08a..688c1db534c1 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -280,10 +280,8 @@ void DemandedBits::performAnalysis() {
// add their operands to the work list (for integer values operands, mark
// all bits as live).
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
- if (!AliveBits.count(&I)) {
- AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+ if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
Worklist.push_back(&I);
- }
continue;
}
@@ -363,8 +361,9 @@ APInt DemandedBits::getDemandedBits(Instruction *I) {
performAnalysis();
const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
- if (AliveBits.count(I))
- return AliveBits[I];
+ auto Found = AliveBits.find(I);
+ if (Found != AliveBits.end())
+ return Found->second;
return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
}
@@ -387,10 +386,10 @@ FunctionPass *llvm::createDemandedBitsWrapperPass() {
return new DemandedBitsWrapperPass();
}
-char DemandedBitsAnalysis::PassID;
+AnalysisKey DemandedBitsAnalysis::Key;
DemandedBits DemandedBitsAnalysis::run(Function &F,
- AnalysisManager<Function> &AM) {
+ FunctionAnalysisManager &AM) {
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
return DemandedBits(F, AC, DT);
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index eb4d925fea73..a332a07ce864 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -122,7 +122,7 @@ DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
return DependenceInfo(&F, &AA, &SE, &LI);
}
-char DependenceAnalysis::PassID;
+AnalysisKey DependenceAnalysis::Key;
INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
"Dependence Analysis", true, true)
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 4554374252a4..15856c3f8b7a 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -56,7 +56,7 @@ LLVM_DUMP_METHOD void DominanceFrontierWrapperPass::dump() const {
}
#endif
-char DominanceFrontierAnalysis::PassID;
+AnalysisKey DominanceFrontierAnalysis::Key;
DominanceFrontier DominanceFrontierAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp
index 5f951f5112e9..ebf0a370b0b0 100644
--- a/lib/Analysis/EHPersonalities.cpp
+++ b/lib/Analysis/EHPersonalities.cpp
@@ -40,6 +40,29 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
.Default(EHPersonality::Unknown);
}
+StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
+ switch (Pers) {
+ case EHPersonality::GNU_Ada: return "__gnat_eh_personality";
+ case EHPersonality::GNU_CXX: return "__gxx_personality_v0";
+ case EHPersonality::GNU_CXX_SjLj: return "__gxx_personality_sj0";
+ case EHPersonality::GNU_C: return "__gcc_personality_v0";
+ case EHPersonality::GNU_C_SjLj: return "__gcc_personality_sj0";
+ case EHPersonality::GNU_ObjC: return "__objc_personality_v0";
+ case EHPersonality::MSVC_X86SEH: return "_except_handler3";
+ case EHPersonality::MSVC_Win64SEH: return "__C_specific_handler";
+ case EHPersonality::MSVC_CXX: return "__CxxFrameHandler3";
+ case EHPersonality::CoreCLR: return "ProcessCLRException";
+ case EHPersonality::Rust: return "rust_eh_personality";
+ case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!");
+ }
+
+ llvm_unreachable("Invalid EHPersonality!");
+}
+
+EHPersonality llvm::getDefaultEHPersonality(const Triple &T) {
+ return EHPersonality::GNU_C;
+}
+
bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
// We can't simplify any invokes to nounwind functions if the personality
@@ -82,7 +105,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
}
// Note that this is a member of the given color.
ColorVector &Colors = BlockColors[Visiting];
- if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end())
+ if (!is_contained(Colors, Color))
Colors.push_back(Color);
else
continue;
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index a7d1e048e133..33f00cb19b26 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -78,7 +78,7 @@ class GlobalsAAResult::FunctionInfo {
return (AlignedMap *)P;
}
enum { NumLowBitsAvailable = 3 };
- static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable),
+ static_assert(alignof(AlignedMap) >= (1 << NumLowBitsAvailable),
"AlignedMap insufficiently aligned to have enough low bits.");
};
@@ -366,6 +366,10 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
} else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return true; // Allow comparison against null.
+ } else if (Constant *C = dyn_cast<Constant>(I)) {
+ // Ignore constants which don't have any live uses.
+ if (isa<GlobalValue>(C) || C->isConstantUsed())
+ return true;
} else {
return true;
}
@@ -521,7 +525,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// Can't say anything about it. However, if it is inside our SCC,
// then nothing needs to be done.
CallGraphNode *CalleeNode = CG[Callee];
- if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+ if (!is_contained(SCC, CalleeNode))
KnowNothing = true;
}
} else {
@@ -857,22 +861,22 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
if (CS.doesNotAccessMemory())
return MRI_NoModRef;
ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
-
+
// Iterate through all the arguments to the called function. If any argument
// is based on GV, return the conservative result.
for (auto &A : CS.args()) {
SmallVector<Value*, 4> Objects;
GetUnderlyingObjects(A, Objects, DL);
-
+
// All objects must be identified.
- if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject) &&
+ if (!all_of(Objects, isIdentifiedObject) &&
// Try ::alias to see if all objects are known not to alias GV.
- !std::all_of(Objects.begin(), Objects.end(), [&](Value *V) {
+ !all_of(Objects, [&](Value *V) {
return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias;
- }))
+ }))
return ConservativeResult;
- if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
+ if (is_contained(Objects, GV))
return ConservativeResult;
}
@@ -937,9 +941,9 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
return Result;
}
-char GlobalsAA::PassID;
+AnalysisKey GlobalsAA::Key;
-GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> &AM) {
+GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
return GlobalsAAResult::analyzeModule(M,
AM.getResult<TargetLibraryAnalysis>(M),
AM.getResult<CallGraphAnalysis>(M));
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 43c0ba17fe4a..76e2561b9da3 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -34,9 +34,9 @@ using namespace llvm;
#define DEBUG_TYPE "iv-users"
-char IVUsersAnalysis::PassID;
+AnalysisKey IVUsersAnalysis::Key;
-IVUsers IVUsersAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
+IVUsers IVUsersAnalysis::run(Loop &L, LoopAnalysisManager &AM) {
const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
Function *F = L.getHeader()->getParent();
@@ -47,7 +47,7 @@ IVUsers IVUsersAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
FAM.getCachedResult<ScalarEvolutionAnalysis>(*F));
}
-PreservedAnalyses IVUsersPrinterPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses IVUsersPrinterPass::run(Loop &L, LoopAnalysisManager &AM) {
AM.getResult<IVUsersAnalysis>(L).print(OS);
return PreservedAnalyses::all();
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index dcb724abc02d..9b9faacd354c 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -40,18 +40,7 @@ using namespace llvm;
STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
-// Threshold to use when optsize is specified (and there is no
-// -inline-threshold).
-const int OptSizeThreshold = 75;
-
-// Threshold to use when -Oz is specified (and there is no -inline-threshold).
-const int OptMinSizeThreshold = 25;
-
-// Threshold to use when -O[34] is specified (and there is no
-// -inline-threshold).
-const int OptAggressiveThreshold = 275;
-
-static cl::opt<int> DefaultInlineThreshold(
+static cl::opt<int> InlineThreshold(
"inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -66,6 +55,11 @@ static cl::opt<int> ColdThreshold(
"inlinecold-threshold", cl::Hidden, cl::init(225),
cl::desc("Threshold for inlining functions with cold attribute"));
+static cl::opt<int>
+ HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
+ cl::ZeroOrMore,
+ cl::desc("Threshold for hot callsites "));
+
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -75,20 +69,23 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
- /// The cache of @llvm.assume intrinsics.
- AssumptionCacheTracker *ACT;
+ /// Getter for the cache of @llvm.assume intrinsics.
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
/// Profile summary information.
ProfileSummaryInfo *PSI;
- // The called function.
+ /// The called function.
Function &F;
- // The candidate callsite being analyzed. Please do not use this to do
- // analysis in the caller function; we want the inline cost query to be
- // easily cacheable. Instead, use the cover function paramHasAttr.
+ /// The candidate callsite being analyzed. Please do not use this to do
+ /// analysis in the caller function; we want the inline cost query to be
+ /// easily cacheable. Instead, use the cover function paramHasAttr.
CallSite CandidateCS;
+ /// Tunable parameters that control the analysis.
+ const InlineParams &Params;
+
int Threshold;
int Cost;
@@ -107,25 +104,25 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
int FiftyPercentVectorBonus, TenPercentVectorBonus;
int VectorBonus;
- // While we walk the potentially-inlined instructions, we build up and
- // maintain a mapping of simplified values specific to this callsite. The
- // idea is to propagate any special information we have about arguments to
- // this call through the inlinable section of the function, and account for
- // likely simplifications post-inlining. The most important aspect we track
- // is CFG altering simplifications -- when we prove a basic block dead, that
- // can cause dramatic shifts in the cost of inlining a function.
+ /// While we walk the potentially-inlined instructions, we build up and
+ /// maintain a mapping of simplified values specific to this callsite. The
+ /// idea is to propagate any special information we have about arguments to
+ /// this call through the inlinable section of the function, and account for
+ /// likely simplifications post-inlining. The most important aspect we track
+ /// is CFG altering simplifications -- when we prove a basic block dead, that
+ /// can cause dramatic shifts in the cost of inlining a function.
DenseMap<Value *, Constant *> SimplifiedValues;
- // Keep track of the values which map back (through function arguments) to
- // allocas on the caller stack which could be simplified through SROA.
+ /// Keep track of the values which map back (through function arguments) to
+ /// allocas on the caller stack which could be simplified through SROA.
DenseMap<Value *, Value *> SROAArgValues;
- // The mapping of caller Alloca values to their accumulated cost savings. If
- // we have to disable SROA for one of the allocas, this tells us how much
- // cost must be added.
+ /// The mapping of caller Alloca values to their accumulated cost savings. If
+ /// we have to disable SROA for one of the allocas, this tells us how much
+ /// cost must be added.
DenseMap<Value *, int> SROAArgCosts;
- // Keep track of values which map to a pointer base and constant offset.
+ /// Keep track of values which map to a pointer base and constant offset.
DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
// Custom simplification helper routines.
@@ -203,20 +200,21 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitUnreachableInst(UnreachableInst &I);
public:
- CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
- ProfileSummaryInfo *PSI, Function &Callee, int Threshold,
- CallSite CSArg)
- : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg),
- Threshold(Threshold), Cost(0), IsCallerRecursive(false),
- IsRecursiveCall(false), ExposesReturnsTwice(false),
- HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
- HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
- AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
- FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
+ CallAnalyzer(const TargetTransformInfo &TTI,
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
+ const InlineParams &Params)
+ : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee),
+ CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
+ Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
+ ExposesReturnsTwice(false), HasDynamicAlloca(false),
+ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+ HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+ NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+ TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+ NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+ NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+ SROACostSavings(0), SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -320,7 +318,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
continue;
// Handle a struct index, which adds its field offset to the pointer.
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = DL.getStructLayout(STy);
Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
@@ -620,42 +618,47 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
}
Function *Caller = CS.getCaller();
- if (DefaultInlineThreshold.getNumOccurrences() > 0) {
- // Explicitly specified -inline-threhold overrides the threshold passed to
- // CallAnalyzer's constructor.
- Threshold = DefaultInlineThreshold;
- } else {
- // If -inline-threshold is not given, listen to the optsize and minsize
- // attributes when they would decrease the threshold.
- if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold)
- Threshold = OptMinSizeThreshold;
- else if (Caller->optForSize() && OptSizeThreshold < Threshold)
- Threshold = OptSizeThreshold;
- }
+
+ // return min(A, B) if B is valid.
+ auto MinIfValid = [](int A, Optional<int> B) {
+ return B ? std::min(A, B.getValue()) : A;
+ };
+
+ // return max(A, B) if B is valid.
+ auto MaxIfValid = [](int A, Optional<int> B) {
+ return B ? std::max(A, B.getValue()) : A;
+ };
+
+ // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
+ // and reduce the threshold if the caller has the necessary attribute.
+ if (Caller->optForMinSize())
+ Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
+ else if (Caller->optForSize())
+ Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
bool HotCallsite = false;
uint64_t TotalWeight;
- if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
- PSI->isHotCount(TotalWeight))
+ if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+ PSI->isHotCount(TotalWeight)) {
HotCallsite = true;
+ }
// Listen to the inlinehint attribute or profile based hotness information
// when it would increase the threshold and the caller does not need to
// minimize its size.
bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
- PSI->isHotFunction(&Callee) ||
- HotCallsite;
- if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
- Threshold = HintThreshold;
-
- bool ColdCallee = PSI->isColdFunction(&Callee);
- // Command line argument for DefaultInlineThreshold will override the default
- // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
- // do not use the default cold threshold even if it is smaller.
- if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
- ColdThreshold.getNumOccurrences() > 0) &&
- ColdCallee && ColdThreshold < Threshold)
- Threshold = ColdThreshold;
+ (PSI && PSI->isFunctionEntryHot(&Callee));
+ if (InlineHint && !Caller->optForMinSize())
+ Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+
+ if (HotCallsite && !Caller->optForMinSize())
+ Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
+
+ bool ColdCallee = PSI && PSI->isFunctionEntryCold(&Callee);
+ // For cold callees, use the ColdThreshold knob if it is available and reduces
+ // the threshold.
+ if (ColdCallee)
+ Threshold = MinIfValid(Threshold, Params.ColdThreshold);
// Finally, take the target-specific inlining threshold multiplier into
// account.
@@ -957,8 +960,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold,
- CS);
+ auto IndirectCallParams = Params;
+ IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
+ CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
@@ -1251,13 +1255,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Cost -= InlineConstants::InstrCost;
}
}
-
+ // The call instruction also disappears after inlining.
+ Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty;
+
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically.
bool OnlyOneCallAndLocalLinkage =
F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
if (OnlyOneCallAndLocalLinkage)
- Cost += InlineConstants::LastCallToStaticBonus;
+ Cost -= InlineConstants::LastCallToStaticBonus;
// If this function uses the coldcc calling convention, prefer not to inline
// it.
@@ -1312,8 +1318,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// the ephemeral values multiple times (and they're completely determined by
// the callee, so this is purely duplicate work).
SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F),
- EphValues);
+ CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
@@ -1444,32 +1449,19 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
-InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
- TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT,
- ProfileSummaryInfo *PSI) {
- return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
- ACT, PSI);
-}
-
-int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
- unsigned SizeOptLevel) {
- if (OptLevel > 2)
- return OptAggressiveThreshold;
- if (SizeOptLevel == 1) // -Os
- return OptSizeThreshold;
- if (SizeOptLevel == 2) // -Oz
- return OptMinSizeThreshold;
- return DefaultInlineThreshold;
+InlineCost llvm::getInlineCost(
+ CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ ProfileSummaryInfo *PSI) {
+ return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
+ GetAssumptionCache, PSI);
}
-int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
-
-InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
- int DefaultThreshold,
- TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT,
- ProfileSummaryInfo *PSI) {
+InlineCost llvm::getInlineCost(
+ CallSite CS, Function *Callee, const InlineParams &Params,
+ TargetTransformInfo &CalleeTTI,
+ std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ ProfileSummaryInfo *PSI) {
// Cannot inline indirect calls.
if (!Callee)
@@ -1494,7 +1486,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
// Don't inline functions which can be interposed at link-time. Don't inline
// functions marked noinline or call sites marked noinline.
- // Note: inlining non-exact non-interposable fucntions is fine, since we know
+ // Note: inlining non-exact non-interposable functions is fine, since we know
// we have *a* correct implementation of the source level function.
if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
CS.isNoInline())
@@ -1503,7 +1495,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS);
+ CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1551,3 +1543,67 @@ bool llvm::isInlineViable(Function &F) {
return true;
}
+
+// APIs to create InlineParams based on command line flags and/or other
+// parameters.
+
+InlineParams llvm::getInlineParams(int Threshold) {
+ InlineParams Params;
+
+ // This field is the threshold to use for a callee by default. This is
+ // derived from one or more of:
+ // * optimization or size-optimization levels,
+ // * a value passed to createFunctionInliningPass function, or
+ // * the -inline-threshold flag.
+ // If the -inline-threshold flag is explicitly specified, that is used
+ // irrespective of anything else.
+ if (InlineThreshold.getNumOccurrences() > 0)
+ Params.DefaultThreshold = InlineThreshold;
+ else
+ Params.DefaultThreshold = Threshold;
+
+ // Set the HintThreshold knob from the -inlinehint-threshold.
+ Params.HintThreshold = HintThreshold;
+
+ // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
+ Params.HotCallSiteThreshold = HotCallSiteThreshold;
+
+ // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+ // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+ // -inlinehint-threshold commandline option is not explicitly given. If that
+ // option is present, then its value applies even for callees with size and
+ // minsize attributes.
+ // If the -inline-threshold is not specified, set the ColdThreshold from the
+ // -inlinecold-threshold even if it is not explicitly passed. If
+ // -inline-threshold is specified, then -inlinecold-threshold needs to be
+ // explicitly specified to set the ColdThreshold knob
+ if (InlineThreshold.getNumOccurrences() == 0) {
+ Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
+ Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
+ Params.ColdThreshold = ColdThreshold;
+ } else if (ColdThreshold.getNumOccurrences() > 0) {
+ Params.ColdThreshold = ColdThreshold;
+ }
+ return Params;
+}
+
+InlineParams llvm::getInlineParams() {
+ return getInlineParams(InlineThreshold);
+}
+
+// Compute the default threshold for inlining based on the opt level and the
+// size opt level.
+static int computeThresholdFromOptLevels(unsigned OptLevel,
+ unsigned SizeOptLevel) {
+ if (OptLevel > 2)
+ return InlineConstants::OptAggressiveThreshold;
+ if (SizeOptLevel == 1) // -Os
+ return InlineConstants::OptSizeThreshold;
+ if (SizeOptLevel == 2) // -Oz
+ return InlineConstants::OptMinSizeThreshold;
+ return InlineThreshold;
+}
+
+InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
+ return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index aeaf9388579c..b4686a1ff175 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -67,9 +67,12 @@ static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
const Query &, unsigned);
static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
unsigned);
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse);
static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
-static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
+static Value *SimplifyCastInst(unsigned, Value *, Type *,
+ const Query &, unsigned);
/// For a boolean type, or a vector of boolean type, return false, or
/// a vector with every element false, as appropriate for the type.
@@ -679,9 +682,26 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
- // 0 - X -> 0 if the sub is NUW.
- if (isNUW && match(Op0, m_Zero()))
- return Op0;
+ // Is this a negation?
+ if (match(Op0, m_Zero())) {
+ // 0 - X -> 0 if the sub is NUW.
+ if (isNUW)
+ return Op0;
+
+ unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
+ APInt KnownZero(BitWidth, 0);
+ APInt KnownOne(BitWidth, 0);
+ computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (KnownZero == ~APInt::getSignBit(BitWidth)) {
+ // Op1 is either 0 or the minimum signed value. If the sub is NSW, then
+ // Op1 must be 0 because negating the minimum signed value is undefined.
+ if (isNSW)
+ return Op0;
+
+ // 0 - X -> X if X is 0 or the minimum signed value.
+ return Op1;
+ }
+ }
// (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
// For example, (X + Y) - Y -> X; (Y + X) - Y -> X
@@ -747,7 +767,8 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// See if "V === X - Y" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
// It does! Now see if "trunc V" simplifies.
- if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+ if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(),
+ Q, MaxRecurse - 1))
// It does, return the simplified "trunc V".
return W;
@@ -1106,6 +1127,10 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (match(Op1, m_Undef()))
return Op1;
+ // X / 1.0 -> X
+ if (match(Op1, m_FPOne()))
+ return Op0;
+
// 0 / X -> 0
// Requires that NaNs are off (X could be zero) and signed zeroes are
// ignored (X could be positive or negative, so the output sign is unknown).
@@ -1497,17 +1522,45 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
return nullptr;
}
-static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
- Type *ITy = Op0->getType();
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
- ConstantInt *CI1, *CI2;
- Value *V;
+ Value *A ,*B;
+ if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
+ !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
+ return nullptr;
+
+ // We have (icmp Pred0, A, B) & (icmp Pred1, A, B).
+ // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
+ // can eliminate Op1 from this 'and'.
+ if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
+ return Op0;
+ // Check for any combination of predicates that are guaranteed to be disjoint.
+ if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
+ (Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) ||
+ (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT) ||
+ (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT))
+ return getFalse(Op0->getType());
+
+ return nullptr;
+}
+
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
return X;
+ if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
+ return X;
+
// Look for this pattern: (icmp V, C0) & (icmp V, C1)).
+ Type *ITy = Op0->getType();
+ ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
+ Value *V;
if (match(Op0, m_ICmp(Pred0, m_Value(V), m_APInt(C0))) &&
match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) {
// Make a constant range that's the intersection of the two icmp ranges.
@@ -1518,21 +1571,22 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return getFalse(ITy);
}
- if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
- m_ConstantInt(CI2))))
+ // (icmp (add V, C0), C1) & (icmp V, C0)
+ if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
return nullptr;
- if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+ if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
return nullptr;
auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ if (AddInst->getOperand(1) != Op1->getOperand(1))
+ return nullptr;
+
bool isNSW = AddInst->hasNoSignedWrap();
bool isNUW = AddInst->hasNoUnsignedWrap();
- const APInt &CI1V = CI1->getValue();
- const APInt &CI2V = CI2->getValue();
- const APInt Delta = CI2V - CI1V;
- if (CI1V.isStrictlyPositive()) {
+ const APInt Delta = *C1 - *C0;
+ if (C0->isStrictlyPositive()) {
if (Delta == 2) {
if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT)
return getFalse(ITy);
@@ -1546,7 +1600,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return getFalse(ITy);
}
}
- if (CI1V.getBoolValue() && isNUW) {
+ if (C0->getBoolValue() && isNUW) {
if (Delta == 2)
if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)
return getFalse(ITy);
@@ -1680,33 +1734,61 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
-/// contains all possible values.
-static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
- ConstantInt *CI1, *CI2;
- Value *V;
+ Value *A ,*B;
+ if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
+ !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
+ return nullptr;
+ // We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
+ // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
+ // can eliminate Op0 from this 'or'.
+ if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
+ return Op1;
+
+ // Check for any combination of predicates that cover the entire range of
+ // possibilities.
+ if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
+ (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) ||
+ (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) ||
+ (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE))
+ return getTrue(Op0->getType());
+
+ return nullptr;
+}
+
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
return X;
- if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
- m_ConstantInt(CI2))))
- return nullptr;
+ if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
+ return X;
- if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+ // (icmp (add V, C0), C1) | (icmp V, C0)
+ ICmpInst::Predicate Pred0, Pred1;
+ const APInt *C0, *C1;
+ Value *V;
+ if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
return nullptr;
- Type *ITy = Op0->getType();
+ if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
+ return nullptr;
auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+ if (AddInst->getOperand(1) != Op1->getOperand(1))
+ return nullptr;
+
+ Type *ITy = Op0->getType();
bool isNSW = AddInst->hasNoSignedWrap();
bool isNUW = AddInst->hasNoUnsignedWrap();
- const APInt &CI1V = CI1->getValue();
- const APInt &CI2V = CI2->getValue();
- const APInt Delta = CI2V - CI1V;
- if (CI1V.isStrictlyPositive()) {
+ const APInt Delta = *C1 - *C0;
+ if (C0->isStrictlyPositive()) {
if (Delta == 2) {
if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
return getTrue(ITy);
@@ -1720,7 +1802,7 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return getTrue(ITy);
}
}
- if (CI1V.getBoolValue() && isNUW) {
+ if (C0->getBoolValue() && isNUW) {
if (Delta == 2)
if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
return getTrue(ITy);
@@ -2102,8 +2184,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
GetUnderlyingObjects(RHS, RHSUObjs, DL);
// Is the set of underlying objects all noalias calls?
- auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
- return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);
+ auto IsNAC = [](ArrayRef<Value *> Objects) {
+ return all_of(Objects, isNoAliasCall);
};
// Is the set of underlying objects all things which must be disjoint from
@@ -2112,8 +2194,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
// live with the compared-to allocation). For globals, we exclude symbols
// that might be resolve lazily to symbols in another dynamically-loaded
// library (and, thus, could be malloc'ed by the implementation).
- auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
- return std::all_of(Objects.begin(), Objects.end(), [](Value *V) {
+ auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) {
+ return all_of(Objects, [](Value *V) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
@@ -2150,470 +2232,275 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
return nullptr;
}
-/// Given operands for an ICmpInst, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
- CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
- assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
-
- if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
- if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
-
- // If we have a constant, make sure it is on the RHS.
- std::swap(LHS, RHS);
- Pred = CmpInst::getSwappedPredicate(Pred);
- }
-
+/// Fold an icmp when its operands have i1 scalar type.
+static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const Query &Q) {
Type *ITy = GetCompareTy(LHS); // The return type.
Type *OpTy = LHS->getType(); // The operand type.
+ if (!OpTy->getScalarType()->isIntegerTy(1))
+ return nullptr;
- // icmp X, X -> true/false
- // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
- // because X could be 0.
- if (LHS == RHS || isa<UndefValue>(RHS))
- return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
- // Special case logic when the operands have i1 type.
- if (OpTy->getScalarType()->isIntegerTy(1)) {
- switch (Pred) {
- default: break;
- case ICmpInst::ICMP_EQ:
- // X == 1 -> X
- if (match(RHS, m_One()))
- return LHS;
- break;
- case ICmpInst::ICMP_NE:
- // X != 0 -> X
- if (match(RHS, m_Zero()))
- return LHS;
- break;
- case ICmpInst::ICMP_UGT:
- // X >u 0 -> X
- if (match(RHS, m_Zero()))
- return LHS;
- break;
- case ICmpInst::ICMP_UGE: {
- // X >=u 1 -> X
- if (match(RHS, m_One()))
- return LHS;
- if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
- return getTrue(ITy);
- break;
- }
- case ICmpInst::ICMP_SGE: {
- /// For signed comparison, the values for an i1 are 0 and -1
- /// respectively. This maps into a truth table of:
- /// LHS | RHS | LHS >=s RHS | LHS implies RHS
- /// 0 | 0 | 1 (0 >= 0) | 1
- /// 0 | 1 | 1 (0 >= -1) | 1
- /// 1 | 0 | 0 (-1 >= 0) | 0
- /// 1 | 1 | 1 (-1 >= -1) | 1
- if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
- return getTrue(ITy);
- break;
- }
- case ICmpInst::ICMP_SLT:
- // X <s 0 -> X
- if (match(RHS, m_Zero()))
- return LHS;
- break;
- case ICmpInst::ICMP_SLE:
- // X <=s -1 -> X
- if (match(RHS, m_One()))
- return LHS;
- break;
- case ICmpInst::ICMP_ULE: {
- if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
- return getTrue(ITy);
- break;
- }
- }
- }
-
- // If we are comparing with zero then try hard since this is a common case.
- if (match(RHS, m_Zero())) {
- bool LHSKnownNonNegative, LHSKnownNegative;
- switch (Pred) {
- default: llvm_unreachable("Unknown ICmp predicate!");
- case ICmpInst::ICMP_ULT:
- return getFalse(ITy);
- case ICmpInst::ICMP_UGE:
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_EQ:
+ // X == 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_NE:
+ // X != 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGT:
+ // X >u 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_UGE:
+ // X >=u 1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
return getTrue(ITy);
- case ICmpInst::ICMP_EQ:
- case ICmpInst::ICMP_ULE:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
- return getFalse(ITy);
- break;
- case ICmpInst::ICMP_NE:
- case ICmpInst::ICMP_UGT:
- if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
- return getTrue(ITy);
- break;
- case ICmpInst::ICMP_SLT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
- return getTrue(ITy);
- if (LHSKnownNonNegative)
- return getFalse(ITy);
- break;
- case ICmpInst::ICMP_SLE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
- return getTrue(ITy);
- if (LHSKnownNonNegative &&
- isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
- return getFalse(ITy);
- break;
- case ICmpInst::ICMP_SGE:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
- return getFalse(ITy);
- if (LHSKnownNonNegative)
- return getTrue(ITy);
- break;
- case ICmpInst::ICMP_SGT:
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
- Q.CxtI, Q.DT);
- if (LHSKnownNegative)
- return getFalse(ITy);
- if (LHSKnownNonNegative &&
- isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
- return getTrue(ITy);
- break;
- }
+ break;
+ case ICmpInst::ICMP_SGE:
+ /// For signed comparison, the values for an i1 are 0 and -1
+ /// respectively. This maps into a truth table of:
+ /// LHS | RHS | LHS >=s RHS | LHS implies RHS
+ /// 0 | 0 | 1 (0 >= 0) | 1
+ /// 0 | 1 | 1 (0 >= -1) | 1
+ /// 1 | 0 | 0 (-1 >= 0) | 0
+ /// 1 | 1 | 1 (-1 >= -1) | 1
+ if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+ return getTrue(ITy);
+ break;
+ case ICmpInst::ICMP_SLT:
+ // X <s 0 -> X
+ if (match(RHS, m_Zero()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_SLE:
+ // X <=s -1 -> X
+ if (match(RHS, m_One()))
+ return LHS;
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+ return getTrue(ITy);
+ break;
}
- // See if we are doing a comparison with a constant integer.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- // Rule out tautological comparisons (eg., ult 0 or uge 0).
- ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
- if (RHS_CR.isEmptySet())
- return ConstantInt::getFalse(CI->getContext());
- if (RHS_CR.isFullSet())
- return ConstantInt::getTrue(CI->getContext());
-
- // Many binary operators with constant RHS have easy to compute constant
- // range. Use them to check whether the comparison is a tautology.
- unsigned Width = CI->getBitWidth();
- APInt Lower = APInt(Width, 0);
- APInt Upper = APInt(Width, 0);
- ConstantInt *CI2;
- if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
- // 'urem x, CI2' produces [0, CI2).
- Upper = CI2->getValue();
- } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
- // 'srem x, CI2' produces (-|CI2|, |CI2|).
- Upper = CI2->getValue().abs();
- Lower = (-Upper) + 1;
- } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) {
- // 'udiv CI2, x' produces [0, CI2].
- Upper = CI2->getValue() + 1;
- } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
- // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
- APInt NegOne = APInt::getAllOnesValue(Width);
- if (!CI2->isZero())
- Upper = NegOne.udiv(CI2->getValue()) + 1;
- } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) {
- if (CI2->isMinSignedValue()) {
- // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
- Lower = CI2->getValue();
- Upper = Lower.lshr(1) + 1;
- } else {
- // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
- Upper = CI2->getValue().abs() + 1;
- Lower = (-Upper) + 1;
- }
- } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
- APInt IntMin = APInt::getSignedMinValue(Width);
- APInt IntMax = APInt::getSignedMaxValue(Width);
- const APInt &Val = CI2->getValue();
- if (Val.isAllOnesValue()) {
- // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
- // where CI2 != -1 and CI2 != 0 and CI2 != 1
- Lower = IntMin + 1;
- Upper = IntMax + 1;
- } else if (Val.countLeadingZeros() < Width - 1) {
- // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]
- // where CI2 != -1 and CI2 != 0 and CI2 != 1
- Lower = IntMin.sdiv(Val);
- Upper = IntMax.sdiv(Val);
- if (Lower.sgt(Upper))
- std::swap(Lower, Upper);
- Upper = Upper + 1;
- assert(Upper != Lower && "Upper part of range has wrapped!");
- }
- } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) {
- // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)]
- Lower = CI2->getValue();
- Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
- } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) {
- if (CI2->isNegative()) {
- // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2]
- unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1;
- Lower = CI2->getValue().shl(ShiftAmount);
- Upper = CI2->getValue() + 1;
- } else {
- // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1]
- unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1;
- Lower = CI2->getValue();
- Upper = CI2->getValue().shl(ShiftAmount) + 1;
- }
- } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
- // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
- APInt NegOne = APInt::getAllOnesValue(Width);
- if (CI2->getValue().ult(Width))
- Upper = NegOne.lshr(CI2->getValue()) + 1;
- } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) {
- // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2].
- unsigned ShiftAmount = Width - 1;
- if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
- ShiftAmount = CI2->getValue().countTrailingZeros();
- Lower = CI2->getValue().lshr(ShiftAmount);
- Upper = CI2->getValue() + 1;
- } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
- // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
- APInt IntMin = APInt::getSignedMinValue(Width);
- APInt IntMax = APInt::getSignedMaxValue(Width);
- if (CI2->getValue().ult(Width)) {
- Lower = IntMin.ashr(CI2->getValue());
- Upper = IntMax.ashr(CI2->getValue()) + 1;
- }
- } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) {
- unsigned ShiftAmount = Width - 1;
- if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
- ShiftAmount = CI2->getValue().countTrailingZeros();
- if (CI2->isNegative()) {
- // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)]
- Lower = CI2->getValue();
- Upper = CI2->getValue().ashr(ShiftAmount) + 1;
- } else {
- // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2]
- Lower = CI2->getValue().ashr(ShiftAmount);
- Upper = CI2->getValue() + 1;
- }
- } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
- // 'or x, CI2' produces [CI2, UINT_MAX].
- Lower = CI2->getValue();
- } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
- // 'and x, CI2' produces [0, CI2].
- Upper = CI2->getValue() + 1;
- } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) {
- // 'add nuw x, CI2' produces [CI2, UINT_MAX].
- Lower = CI2->getValue();
- }
-
- ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper)
- : ConstantRange(Width, true);
+ return nullptr;
+}
- if (auto *I = dyn_cast<Instruction>(LHS))
- if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
- LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
+/// Try hard to fold icmp with zero RHS because this is a common case.
+static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const Query &Q) {
+ if (!match(RHS, m_Zero()))
+ return nullptr;
- if (!LHS_CR.isFullSet()) {
- if (RHS_CR.contains(LHS_CR))
- return ConstantInt::getTrue(RHS->getContext());
- if (RHS_CR.inverse().contains(LHS_CR))
- return ConstantInt::getFalse(RHS->getContext());
- }
+ Type *ITy = GetCompareTy(LHS); // The return type.
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ switch (Pred) {
+ default:
+ llvm_unreachable("Unknown ICmp predicate!");
+ case ICmpInst::ICMP_ULT:
+ return getFalse(ITy);
+ case ICmpInst::ICMP_UGE:
+ return getTrue(ITy);
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_ULE:
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return getFalse(ITy);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGT:
+ if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return getTrue(ITy);
+ break;
+ case ICmpInst::ICMP_SLT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
+ if (LHSKnownNegative)
+ return getTrue(ITy);
+ if (LHSKnownNonNegative)
+ return getFalse(ITy);
+ break;
+ case ICmpInst::ICMP_SLE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
+ if (LHSKnownNegative)
+ return getTrue(ITy);
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return getFalse(ITy);
+ break;
+ case ICmpInst::ICMP_SGE:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
+ if (LHSKnownNegative)
+ return getFalse(ITy);
+ if (LHSKnownNonNegative)
+ return getTrue(ITy);
+ break;
+ case ICmpInst::ICMP_SGT:
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+ Q.CxtI, Q.DT);
+ if (LHSKnownNegative)
+ return getFalse(ITy);
+ if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return getTrue(ITy);
+ break;
}
- // If both operands have range metadata, use the metadata
- // to simplify the comparison.
- if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
- auto RHS_Instr = dyn_cast<Instruction>(RHS);
- auto LHS_Instr = dyn_cast<Instruction>(LHS);
-
- if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
- LHS_Instr->getMetadata(LLVMContext::MD_range)) {
- auto RHS_CR = getConstantRangeFromMetadata(
- *RHS_Instr->getMetadata(LLVMContext::MD_range));
- auto LHS_CR = getConstantRangeFromMetadata(
- *LHS_Instr->getMetadata(LLVMContext::MD_range));
+ return nullptr;
+}
- auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
- if (Satisfied_CR.contains(LHS_CR))
- return ConstantInt::getTrue(RHS->getContext());
+static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS) {
+ const APInt *C;
+ if (!match(RHS, m_APInt(C)))
+ return nullptr;
- auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
- CmpInst::getInversePredicate(Pred), RHS_CR);
- if (InversedSatisfied_CR.contains(LHS_CR))
- return ConstantInt::getFalse(RHS->getContext());
+ // Rule out tautological comparisons (eg., ult 0 or uge 0).
+ ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C);
+ if (RHS_CR.isEmptySet())
+ return ConstantInt::getFalse(GetCompareTy(RHS));
+ if (RHS_CR.isFullSet())
+ return ConstantInt::getTrue(GetCompareTy(RHS));
+
+ // Many binary operators with constant RHS have easy to compute constant
+ // range. Use them to check whether the comparison is a tautology.
+ unsigned Width = C->getBitWidth();
+ APInt Lower = APInt(Width, 0);
+ APInt Upper = APInt(Width, 0);
+ const APInt *C2;
+ if (match(LHS, m_URem(m_Value(), m_APInt(C2)))) {
+ // 'urem x, C2' produces [0, C2).
+ Upper = *C2;
+ } else if (match(LHS, m_SRem(m_Value(), m_APInt(C2)))) {
+ // 'srem x, C2' produces (-|C2|, |C2|).
+ Upper = C2->abs();
+ Lower = (-Upper) + 1;
+ } else if (match(LHS, m_UDiv(m_APInt(C2), m_Value()))) {
+ // 'udiv C2, x' produces [0, C2].
+ Upper = *C2 + 1;
+ } else if (match(LHS, m_UDiv(m_Value(), m_APInt(C2)))) {
+ // 'udiv x, C2' produces [0, UINT_MAX / C2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (*C2 != 0)
+ Upper = NegOne.udiv(*C2) + 1;
+ } else if (match(LHS, m_SDiv(m_APInt(C2), m_Value()))) {
+ if (C2->isMinSignedValue()) {
+ // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+ Lower = *C2;
+ Upper = Lower.lshr(1) + 1;
+ } else {
+ // 'sdiv C2, x' produces [-|C2|, |C2|].
+ Upper = C2->abs() + 1;
+ Lower = (-Upper) + 1;
}
- }
-
- // Compare of cast, for example (zext X) != 0 -> X != 0
- if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
- Instruction *LI = cast<CastInst>(LHS);
- Value *SrcOp = LI->getOperand(0);
- Type *SrcTy = SrcOp->getType();
- Type *DstTy = LI->getType();
-
- // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
- // if the integer type is the same size as the pointer type.
- if (MaxRecurse && isa<PtrToIntInst>(LI) &&
- Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
- if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
- // Transfer the cast to the constant.
- if (Value *V = SimplifyICmpInst(Pred, SrcOp,
- ConstantExpr::getIntToPtr(RHSC, SrcTy),
- Q, MaxRecurse-1))
- return V;
- } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
- if (RI->getOperand(0)->getType() == SrcTy)
- // Compare without the cast.
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- Q, MaxRecurse-1))
- return V;
- }
+ } else if (match(LHS, m_SDiv(m_Value(), m_APInt(C2)))) {
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (C2->isAllOnesValue()) {
+ // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+ // where C2 != -1 and C2 != 0 and C2 != 1
+ Lower = IntMin + 1;
+ Upper = IntMax + 1;
+ } else if (C2->countLeadingZeros() < Width - 1) {
+ // 'sdiv x, C2' produces [INT_MIN / C2, INT_MAX / C2]
+ // where C2 != -1 and C2 != 0 and C2 != 1
+ Lower = IntMin.sdiv(*C2);
+ Upper = IntMax.sdiv(*C2);
+ if (Lower.sgt(Upper))
+ std::swap(Lower, Upper);
+ Upper = Upper + 1;
+ assert(Upper != Lower && "Upper part of range has wrapped!");
}
-
- if (isa<ZExtInst>(LHS)) {
- // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
- // same type.
- if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
- if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
- // Compare X and Y. Note that signed predicates become unsigned.
- if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, RI->getOperand(0), Q,
- MaxRecurse-1))
- return V;
- }
- // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
- // too. If not, then try to deduce the result of the comparison.
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- // Compute the constant that would happen if we truncated to SrcTy then
- // reextended to DstTy.
- Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
- Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
-
- // If the re-extended constant didn't change then this is effectively
- // also a case of comparing two zero-extended values.
- if (RExt == CI && MaxRecurse)
- if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
- SrcOp, Trunc, Q, MaxRecurse-1))
- return V;
-
- // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
- // there. Use this to work out the result of the comparison.
- if (RExt != CI) {
- switch (Pred) {
- default: llvm_unreachable("Unknown ICmp predicate!");
- // LHS <u RHS.
- case ICmpInst::ICMP_EQ:
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- return ConstantInt::getFalse(CI->getContext());
-
- case ICmpInst::ICMP_NE:
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- return ConstantInt::getTrue(CI->getContext());
-
- // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
- // is non-negative then LHS <s RHS.
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE:
- return CI->getValue().isNegative() ?
- ConstantInt::getTrue(CI->getContext()) :
- ConstantInt::getFalse(CI->getContext());
-
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE:
- return CI->getValue().isNegative() ?
- ConstantInt::getFalse(CI->getContext()) :
- ConstantInt::getTrue(CI->getContext());
- }
- }
- }
+ } else if (match(LHS, m_NUWShl(m_APInt(C2), m_Value()))) {
+ // 'shl nuw C2, x' produces [C2, C2 << CLZ(C2)]
+ Lower = *C2;
+ Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+ } else if (match(LHS, m_NSWShl(m_APInt(C2), m_Value()))) {
+ if (C2->isNegative()) {
+ // 'shl nsw C2, x' produces [C2 << CLO(C2)-1, C2]
+ unsigned ShiftAmount = C2->countLeadingOnes() - 1;
+ Lower = C2->shl(ShiftAmount);
+ Upper = *C2 + 1;
+ } else {
+ // 'shl nsw C2, x' produces [C2, C2 << CLZ(C2)-1]
+ unsigned ShiftAmount = C2->countLeadingZeros() - 1;
+ Lower = *C2;
+ Upper = C2->shl(ShiftAmount) + 1;
}
+ } else if (match(LHS, m_LShr(m_Value(), m_APInt(C2)))) {
+ // 'lshr x, C2' produces [0, UINT_MAX >> C2].
+ APInt NegOne = APInt::getAllOnesValue(Width);
+ if (C2->ult(Width))
+ Upper = NegOne.lshr(*C2) + 1;
+ } else if (match(LHS, m_LShr(m_APInt(C2), m_Value()))) {
+ // 'lshr C2, x' produces [C2 >> (Width-1), C2].
+ unsigned ShiftAmount = Width - 1;
+ if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
+ ShiftAmount = C2->countTrailingZeros();
+ Lower = C2->lshr(ShiftAmount);
+ Upper = *C2 + 1;
+ } else if (match(LHS, m_AShr(m_Value(), m_APInt(C2)))) {
+ // 'ashr x, C2' produces [INT_MIN >> C2, INT_MAX >> C2].
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (C2->ult(Width)) {
+ Lower = IntMin.ashr(*C2);
+ Upper = IntMax.ashr(*C2) + 1;
+ }
+ } else if (match(LHS, m_AShr(m_APInt(C2), m_Value()))) {
+ unsigned ShiftAmount = Width - 1;
+ if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
+ ShiftAmount = C2->countTrailingZeros();
+ if (C2->isNegative()) {
+ // 'ashr C2, x' produces [C2, C2 >> (Width-1)]
+ Lower = *C2;
+ Upper = C2->ashr(ShiftAmount) + 1;
+ } else {
+ // 'ashr C2, x' produces [C2 >> (Width-1), C2]
+ Lower = C2->ashr(ShiftAmount);
+ Upper = *C2 + 1;
+ }
+ } else if (match(LHS, m_Or(m_Value(), m_APInt(C2)))) {
+ // 'or x, C2' produces [C2, UINT_MAX].
+ Lower = *C2;
+ } else if (match(LHS, m_And(m_Value(), m_APInt(C2)))) {
+ // 'and x, C2' produces [0, C2].
+ Upper = *C2 + 1;
+ } else if (match(LHS, m_NUWAdd(m_Value(), m_APInt(C2)))) {
+ // 'add nuw x, C2' produces [C2, UINT_MAX].
+ Lower = *C2;
+ }
- if (isa<SExtInst>(LHS)) {
- // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
- // same type.
- if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
- if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
- // Compare X and Y. Note that the predicate does not change.
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
- Q, MaxRecurse-1))
- return V;
- }
- // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
- // too. If not, then try to deduce the result of the comparison.
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- // Compute the constant that would happen if we truncated to SrcTy then
- // reextended to DstTy.
- Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
- Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
-
- // If the re-extended constant didn't change then this is effectively
- // also a case of comparing two sign-extended values.
- if (RExt == CI && MaxRecurse)
- if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
- return V;
-
- // Otherwise the upper bits of LHS are all equal, while RHS has varying
- // bits there. Use this to work out the result of the comparison.
- if (RExt != CI) {
- switch (Pred) {
- default: llvm_unreachable("Unknown ICmp predicate!");
- case ICmpInst::ICMP_EQ:
- return ConstantInt::getFalse(CI->getContext());
- case ICmpInst::ICMP_NE:
- return ConstantInt::getTrue(CI->getContext());
+ ConstantRange LHS_CR =
+ Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
- // If RHS is non-negative then LHS <s RHS. If RHS is negative then
- // LHS >s RHS.
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE:
- return CI->getValue().isNegative() ?
- ConstantInt::getTrue(CI->getContext()) :
- ConstantInt::getFalse(CI->getContext());
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE:
- return CI->getValue().isNegative() ?
- ConstantInt::getFalse(CI->getContext()) :
- ConstantInt::getTrue(CI->getContext());
+ if (auto *I = dyn_cast<Instruction>(LHS))
+ if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+ LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
- // If LHS is non-negative then LHS <u RHS. If LHS is negative then
- // LHS >u RHS.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE:
- // Comparison is true iff the LHS <s 0.
- if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
- Constant::getNullValue(SrcTy),
- Q, MaxRecurse-1))
- return V;
- break;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE:
- // Comparison is true iff the LHS >=s 0.
- if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
- Constant::getNullValue(SrcTy),
- Q, MaxRecurse-1))
- return V;
- break;
- }
- }
- }
- }
+ if (!LHS_CR.isFullSet()) {
+ if (RHS_CR.contains(LHS_CR))
+ return ConstantInt::getTrue(GetCompareTy(RHS));
+ if (RHS_CR.inverse().contains(LHS_CR))
+ return ConstantInt::getFalse(GetCompareTy(RHS));
}
- // icmp eq|ne X, Y -> false|true if X != Y
- if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
- isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
- LLVMContext &Ctx = LHS->getType()->getContext();
- return Pred == ICmpInst::ICMP_NE ?
- ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
- }
+ return nullptr;
+}
+
+static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const Query &Q,
+ unsigned MaxRecurse) {
+ Type *ITy = GetCompareTy(LHS); // The return type.
- // Special logic for binary operators.
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
if (MaxRecurse && (LBO || RBO)) {
@@ -2622,35 +2509,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
if (LBO && LBO->getOpcode() == Instruction::Add) {
- A = LBO->getOperand(0); B = LBO->getOperand(1);
- NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+ A = LBO->getOperand(0);
+ B = LBO->getOperand(1);
+ NoLHSWrapProblem =
+ ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
}
if (RBO && RBO->getOpcode() == Instruction::Add) {
- C = RBO->getOperand(0); D = RBO->getOperand(1);
- NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+ C = RBO->getOperand(0);
+ D = RBO->getOperand(1);
+ NoRHSWrapProblem =
+ ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+ (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
}
// icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
if ((A == RHS || B == RHS) && NoLHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
- Constant::getNullValue(RHS->getType()),
- Q, MaxRecurse-1))
+ Constant::getNullValue(RHS->getType()), Q,
+ MaxRecurse - 1))
return V;
// icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
if ((C == LHS || D == LHS) && NoRHSWrapProblem)
- if (Value *V = SimplifyICmpInst(Pred,
- Constant::getNullValue(LHS->getType()),
- C == LHS ? D : C, Q, MaxRecurse-1))
+ if (Value *V =
+ SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
+ C == LHS ? D : C, Q, MaxRecurse - 1))
return V;
// icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
- if (A && C && (A == C || A == D || B == C || B == D) &&
- NoLHSWrapProblem && NoRHSWrapProblem) {
+ if (A && C && (A == C || A == D || B == C || B == D) && NoLHSWrapProblem &&
+ NoRHSWrapProblem) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y, *Z;
if (A == C) {
@@ -2671,7 +2562,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Y = A;
Z = C;
}
- if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
return V;
}
}
@@ -2771,7 +2662,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
- // fall-through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
@@ -2782,7 +2673,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
- // fall-through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
@@ -2802,7 +2693,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
- // fall-through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
@@ -2813,7 +2704,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Q.CxtI, Q.DT);
if (!KnownNonNegative)
break;
- // fall-through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
@@ -2832,6 +2723,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getTrue(ITy);
}
+ // x >=u x >> y
+ // x >=u x udiv y.
+ if (RBO && (match(RBO, m_LShr(m_Specific(LHS), m_Value())) ||
+ match(RBO, m_UDiv(m_Specific(LHS), m_Value())))) {
+ // icmp pred X, (X op Y)
+ if (Pred == ICmpInst::ICMP_ULT)
+ return getFalse(ITy);
+ if (Pred == ICmpInst::ICMP_UGE)
+ return getTrue(ITy);
+ }
+
// handle:
// CI2 << X == CI
// CI2 << X != CI
@@ -2870,18 +2772,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
LBO->getOperand(1) == RBO->getOperand(1)) {
switch (LBO->getOpcode()) {
- default: break;
+ default:
+ break;
case Instruction::UDiv:
case Instruction::LShr:
if (ICmpInst::isSigned(Pred))
break;
- // fall-through
+ LLVM_FALLTHROUGH;
case Instruction::SDiv:
case Instruction::AShr:
if (!LBO->isExact() || !RBO->isExact())
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), Q, MaxRecurse-1))
+ RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::Shl: {
@@ -2892,40 +2795,51 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!NSW && ICmpInst::isSigned(Pred))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
- RBO->getOperand(0), Q, MaxRecurse-1))
+ RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
}
}
}
+ return nullptr;
+}
- // Simplify comparisons involving max/min.
+/// Simplify integer comparisons where at least one operand of the compare
+/// matches an integer min/max idiom.
+static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS, const Query &Q,
+ unsigned MaxRecurse) {
+ Type *ITy = GetCompareTy(LHS); // The return type.
Value *A, *B;
CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
// Signed variants on "max(a,b)>=a -> true".
if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
- if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+ if (A != RHS)
+ std::swap(A, B); // smax(A, B) pred A.
EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
// We analyze this as smax(A, B) pred A.
P = Pred;
} else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
- if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+ if (A != LHS)
+ std::swap(A, B); // A pred smax(A, B).
EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
// We analyze this as smax(A, B) swapped-pred A.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
(A == RHS || B == RHS)) {
- if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+ if (A != RHS)
+ std::swap(A, B); // smin(A, B) pred A.
EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
// We analyze this as smax(-A, -B) swapped-pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
- if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+ if (A != LHS)
+ std::swap(A, B); // A pred smin(A, B).
EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
// We analyze this as smax(-A, -B) pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
@@ -2946,7 +2860,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -2960,7 +2874,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
return V;
break;
}
@@ -2976,26 +2890,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Unsigned variants on "max(a,b)>=a -> true".
P = CmpInst::BAD_ICMP_PREDICATE;
if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
- if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+ if (A != RHS)
+ std::swap(A, B); // umax(A, B) pred A.
EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
// We analyze this as umax(A, B) pred A.
P = Pred;
} else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
- if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+ if (A != LHS)
+ std::swap(A, B); // A pred umax(A, B).
EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
// We analyze this as umax(A, B) swapped-pred A.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
(A == RHS || B == RHS)) {
- if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+ if (A != RHS)
+ std::swap(A, B); // umin(A, B) pred A.
EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
// We analyze this as umax(-A, -B) swapped-pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
- if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+ if (A != LHS)
+ std::swap(A, B); // A pred umin(A, B).
EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
// We analyze this as umax(-A, -B) pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
@@ -3016,7 +2934,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
return V;
break;
case CmpInst::ICMP_NE:
@@ -3030,7 +2948,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
- if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+ if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
return V;
break;
}
@@ -3087,11 +3005,254 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return getFalse(ITy);
}
+ return nullptr;
+}
+
+/// Given operands for an ICmpInst, see if we can fold the result.
+/// If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const Query &Q, unsigned MaxRecurse) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ Type *ITy = GetCompareTy(LHS); // The return type.
+
+ // icmp X, X -> true/false
+ // X icmp undef -> true/false. For example, icmp ugt %X, undef -> false
+ // because X could be 0.
+ if (LHS == RHS || isa<UndefValue>(RHS))
+ return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+ if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q))
+ return V;
+
+ if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
+ return V;
+
+ if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS))
+ return V;
+
+ // If both operands have range metadata, use the metadata
+ // to simplify the comparison.
+ if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
+ auto RHS_Instr = dyn_cast<Instruction>(RHS);
+ auto LHS_Instr = dyn_cast<Instruction>(LHS);
+
+ if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
+ LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+ auto RHS_CR = getConstantRangeFromMetadata(
+ *RHS_Instr->getMetadata(LLVMContext::MD_range));
+ auto LHS_CR = getConstantRangeFromMetadata(
+ *LHS_Instr->getMetadata(LLVMContext::MD_range));
+
+ auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
+ if (Satisfied_CR.contains(LHS_CR))
+ return ConstantInt::getTrue(RHS->getContext());
+
+ auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
+ CmpInst::getInversePredicate(Pred), RHS_CR);
+ if (InversedSatisfied_CR.contains(LHS_CR))
+ return ConstantInt::getFalse(RHS->getContext());
+ }
+ }
+
+ // Compare of cast, for example (zext X) != 0 -> X != 0
+ if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+ Instruction *LI = cast<CastInst>(LHS);
+ Value *SrcOp = LI->getOperand(0);
+ Type *SrcTy = SrcOp->getType();
+ Type *DstTy = LI->getType();
+
+ // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+ // if the integer type is the same size as the pointer type.
+ if (MaxRecurse && isa<PtrToIntInst>(LI) &&
+ Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // Transfer the cast to the constant.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+ ConstantExpr::getIntToPtr(RHSC, SrcTy),
+ Q, MaxRecurse-1))
+ return V;
+ } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+ if (RI->getOperand(0)->getType() == SrcTy)
+ // Compare without the cast.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ Q, MaxRecurse-1))
+ return V;
+ }
+ }
+
+ if (isa<ZExtInst>(LHS)) {
+ // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+ // same type.
+ if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that signed predicates become unsigned.
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, RI->getOperand(0), Q,
+ MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two zero-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+ SrcOp, Trunc, Q, MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+ // there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default: llvm_unreachable("Unknown ICmp predicate!");
+ // LHS <u RHS.
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ return ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
+ // is non-negative then LHS <s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+ }
+ }
+ }
+ }
+
+ if (isa<SExtInst>(LHS)) {
+ // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+ // same type.
+ if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+ if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+ // Compare X and Y. Note that the predicate does not change.
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+ Q, MaxRecurse-1))
+ return V;
+ }
+ // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+ // too. If not, then try to deduce the result of the comparison.
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // Compute the constant that would happen if we truncated to SrcTy then
+ // reextended to DstTy.
+ Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+ // If the re-extended constant didn't change then this is effectively
+ // also a case of comparing two sign-extended values.
+ if (RExt == CI && MaxRecurse)
+ if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
+ return V;
+
+ // Otherwise the upper bits of LHS are all equal, while RHS has varying
+ // bits there. Use this to work out the result of the comparison.
+ if (RExt != CI) {
+ switch (Pred) {
+ default: llvm_unreachable("Unknown ICmp predicate!");
+ case ICmpInst::ICMP_EQ:
+ return ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_NE:
+ return ConstantInt::getTrue(CI->getContext());
+
+ // If RHS is non-negative then LHS <s RHS. If RHS is negative then
+ // LHS >s RHS.
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getTrue(CI->getContext()) :
+ ConstantInt::getFalse(CI->getContext());
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE:
+ return CI->getValue().isNegative() ?
+ ConstantInt::getFalse(CI->getContext()) :
+ ConstantInt::getTrue(CI->getContext());
+
+ // If LHS is non-negative then LHS <u RHS. If LHS is negative then
+ // LHS >u RHS.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ // Comparison is true iff the LHS <s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+ Constant::getNullValue(SrcTy),
+ Q, MaxRecurse-1))
+ return V;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ // Comparison is true iff the LHS >=s 0.
+ if (MaxRecurse)
+ if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+ Constant::getNullValue(SrcTy),
+ Q, MaxRecurse-1))
+ return V;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // icmp eq|ne X, Y -> false|true if X != Y
+ if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+ isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+ LLVMContext &Ctx = LHS->getType()->getContext();
+ return Pred == ICmpInst::ICMP_NE ?
+ ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+ }
+
+ if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse))
+ return V;
+
+ if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
+ return V;
+
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS))
return C;
+ if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
+ if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
+ if (Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) ==
+ Q.DL.getTypeSizeInBits(CLHS->getType()) &&
+ Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
+ Q.DL.getTypeSizeInBits(CRHS->getType()))
+ if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI,
+ CLHS->getPointerOperand(),
+ CRHS->getPointerOperand()))
+ return C;
if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
@@ -3119,17 +3280,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If a bit is known to be zero for A and known to be one for B,
// then A and B cannot be equal.
if (ICmpInst::isEquality(Pred)) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- uint32_t BitWidth = CI->getBitWidth();
+ const APInt *RHSVal;
+ if (match(RHS, m_APInt(RHSVal))) {
+ unsigned BitWidth = RHSVal->getBitWidth();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
Q.CxtI, Q.DT);
- const APInt &RHSVal = CI->getValue();
- if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0))
- return Pred == ICmpInst::ICMP_EQ
- ? ConstantInt::getFalse(CI->getContext())
- : ConstantInt::getTrue(CI->getContext());
+ if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0))
+ return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
+ : ConstantInt::getTrue(ITy);
}
}
@@ -3175,17 +3335,18 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Fold trivial predicates.
+ Type *RetTy = GetCompareTy(LHS);
if (Pred == FCmpInst::FCMP_FALSE)
- return ConstantInt::get(GetCompareTy(LHS), 0);
+ return getFalse(RetTy);
if (Pred == FCmpInst::FCMP_TRUE)
- return ConstantInt::get(GetCompareTy(LHS), 1);
+ return getTrue(RetTy);
// UNO/ORD predicates can be trivially folded if NaNs are ignored.
if (FMF.noNaNs()) {
if (Pred == FCmpInst::FCMP_UNO)
- return ConstantInt::get(GetCompareTy(LHS), 0);
+ return getFalse(RetTy);
if (Pred == FCmpInst::FCMP_ORD)
- return ConstantInt::get(GetCompareTy(LHS), 1);
+ return getTrue(RetTy);
}
// fcmp pred x, undef and fcmp pred undef, x
@@ -3193,15 +3354,15 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
// Choosing NaN for the undef will always make unordered comparison succeed
// and ordered comparison fail.
- return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred));
+ return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
}
// fcmp x,x -> true/false. Not all compares are foldable.
if (LHS == RHS) {
if (CmpInst::isTrueWhenEqual(Pred))
- return ConstantInt::get(GetCompareTy(LHS), 1);
+ return getTrue(RetTy);
if (CmpInst::isFalseWhenEqual(Pred))
- return ConstantInt::get(GetCompareTy(LHS), 0);
+ return getFalse(RetTy);
}
// Handle fcmp with constant RHS
@@ -3216,11 +3377,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If the constant is a nan, see if we can fold the comparison based on it.
if (CFP->getValueAPF().isNaN()) {
if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
- return ConstantInt::getFalse(CFP->getContext());
+ return getFalse(RetTy);
assert(FCmpInst::isUnordered(Pred) &&
"Comparison must be either ordered or unordered!");
// True if unordered.
- return ConstantInt::get(GetCompareTy(LHS), 1);
+ return getTrue(RetTy);
}
// Check whether the constant is an infinity.
if (CFP->getValueAPF().isInfinity()) {
@@ -3228,10 +3389,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
switch (Pred) {
case FCmpInst::FCMP_OLT:
// No value is ordered and less than negative infinity.
- return ConstantInt::get(GetCompareTy(LHS), 0);
+ return getFalse(RetTy);
case FCmpInst::FCMP_UGE:
// All values are unordered with or at least negative infinity.
- return ConstantInt::get(GetCompareTy(LHS), 1);
+ return getTrue(RetTy);
default:
break;
}
@@ -3239,10 +3400,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
switch (Pred) {
case FCmpInst::FCMP_OGT:
// No value is ordered and greater than infinity.
- return ConstantInt::get(GetCompareTy(LHS), 0);
+ return getFalse(RetTy);
case FCmpInst::FCMP_ULE:
// All values are unordered with and at most infinity.
- return ConstantInt::get(GetCompareTy(LHS), 1);
+ return getTrue(RetTy);
default:
break;
}
@@ -3252,12 +3413,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
switch (Pred) {
case FCmpInst::FCMP_UGE:
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return ConstantInt::get(GetCompareTy(LHS), 1);
+ return getTrue(RetTy);
break;
case FCmpInst::FCMP_OLT:
// X < 0
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
- return ConstantInt::get(GetCompareTy(LHS), 0);
+ return getFalse(RetTy);
break;
default:
break;
@@ -3371,6 +3532,150 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return nullptr;
}
+/// Try to simplify a select instruction when its condition operand is an
+/// integer comparison where one operand of the compare is a constant.
+static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
+ const APInt *Y, bool TrueWhenUnset) {
+ const APInt *C;
+
+ // (X & Y) == 0 ? X & ~Y : X --> X
+ // (X & Y) != 0 ? X & ~Y : X --> X & ~Y
+ if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
+ *Y == ~*C)
+ return TrueWhenUnset ? FalseVal : TrueVal;
+
+ // (X & Y) == 0 ? X : X & ~Y --> X & ~Y
+ // (X & Y) != 0 ? X : X & ~Y --> X
+ if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
+ *Y == ~*C)
+ return TrueWhenUnset ? FalseVal : TrueVal;
+
+ if (Y->isPowerOf2()) {
+ // (X & Y) == 0 ? X | Y : X --> X | Y
+ // (X & Y) != 0 ? X | Y : X --> X
+ if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
+ *Y == *C)
+ return TrueWhenUnset ? TrueVal : FalseVal;
+
+ // (X & Y) == 0 ? X : X | Y --> X
+ // (X & Y) != 0 ? X : X | Y --> X | Y
+ if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
+ *Y == *C)
+ return TrueWhenUnset ? TrueVal : FalseVal;
+ }
+
+ return nullptr;
+}
+
+/// An alternative way to test if a bit is set or not uses sgt/slt instead of
+/// eq/ne.
+static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal,
+ Value *FalseVal,
+ bool TrueWhenUnset) {
+ unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
+ if (!BitWidth)
+ return nullptr;
+
+ APInt MinSignedValue;
+ Value *X;
+ if (match(CmpLHS, m_Trunc(m_Value(X))) && (X == TrueVal || X == FalseVal)) {
+ // icmp slt (trunc X), 0 <--> icmp ne (and X, C), 0
+ // icmp sgt (trunc X), -1 <--> icmp eq (and X, C), 0
+ unsigned DestSize = CmpLHS->getType()->getScalarSizeInBits();
+ MinSignedValue = APInt::getSignedMinValue(DestSize).zext(BitWidth);
+ } else {
+ // icmp slt X, 0 <--> icmp ne (and X, C), 0
+ // icmp sgt X, -1 <--> icmp eq (and X, C), 0
+ X = CmpLHS;
+ MinSignedValue = APInt::getSignedMinValue(BitWidth);
+ }
+
+ if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, &MinSignedValue,
+ TrueWhenUnset))
+ return V;
+
+ return nullptr;
+}
+
+/// Try to simplify a select instruction when its condition operand is an
+/// integer comparison.
+static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
+ Value *FalseVal, const Query &Q,
+ unsigned MaxRecurse) {
+ ICmpInst::Predicate Pred;
+ Value *CmpLHS, *CmpRHS;
+ if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS))))
+ return nullptr;
+
+ // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
+ // decomposeBitTestICmp() might help.
+ if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) {
+ Value *X;
+ const APInt *Y;
+ if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y))))
+ if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
+ Pred == ICmpInst::ICMP_EQ))
+ return V;
+ } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
+ // Comparing signed-less-than 0 checks if the sign bit is set.
+ if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
+ false))
+ return V;
+ } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
+ // Comparing signed-greater-than -1 checks if the sign bit is not set.
+ if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
+ true))
+ return V;
+ }
+
+ if (CondVal->hasOneUse()) {
+ const APInt *C;
+ if (match(CmpRHS, m_APInt(C))) {
+ // X < MIN ? T : F --> F
+ if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
+ return FalseVal;
+ // X < MIN ? T : F --> F
+ if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
+ return FalseVal;
+ // X > MAX ? T : F --> F
+ if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
+ return FalseVal;
+ // X > MAX ? T : F --> F
+ if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
+ return FalseVal;
+ }
+ }
+
+ // If we have an equality comparison, then we know the value in one of the
+ // arms of the select. See if substituting this value into the arm and
+ // simplifying the result yields the same value as the other arm.
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ TrueVal)
+ return FalseVal;
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ FalseVal)
+ return FalseVal;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ FalseVal ||
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ FalseVal)
+ return TrueVal;
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+ TrueVal ||
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+ TrueVal)
+ return TrueVal;
+ }
+
+ return nullptr;
+}
+
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
@@ -3399,106 +3704,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
return TrueVal;
- if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
- // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
- // decomposeBitTestICmp() might help.
- unsigned BitWidth =
- Q.DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
- ICmpInst::Predicate Pred = ICI->getPredicate();
- Value *CmpLHS = ICI->getOperand(0);
- Value *CmpRHS = ICI->getOperand(1);
- APInt MinSignedValue = APInt::getSignBit(BitWidth);
- Value *X;
- const APInt *Y;
- bool TrueWhenUnset;
- bool IsBitTest = false;
- if (ICmpInst::isEquality(Pred) &&
- match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) &&
- match(CmpRHS, m_Zero())) {
- IsBitTest = true;
- TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
- } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
- X = CmpLHS;
- Y = &MinSignedValue;
- IsBitTest = true;
- TrueWhenUnset = false;
- } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
- X = CmpLHS;
- Y = &MinSignedValue;
- IsBitTest = true;
- TrueWhenUnset = true;
- }
- if (IsBitTest) {
- const APInt *C;
- // (X & Y) == 0 ? X & ~Y : X --> X
- // (X & Y) != 0 ? X & ~Y : X --> X & ~Y
- if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
- *Y == ~*C)
- return TrueWhenUnset ? FalseVal : TrueVal;
- // (X & Y) == 0 ? X : X & ~Y --> X & ~Y
- // (X & Y) != 0 ? X : X & ~Y --> X
- if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
- *Y == ~*C)
- return TrueWhenUnset ? FalseVal : TrueVal;
-
- if (Y->isPowerOf2()) {
- // (X & Y) == 0 ? X | Y : X --> X | Y
- // (X & Y) != 0 ? X | Y : X --> X
- if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
- *Y == *C)
- return TrueWhenUnset ? TrueVal : FalseVal;
- // (X & Y) == 0 ? X : X | Y --> X
- // (X & Y) != 0 ? X : X | Y --> X | Y
- if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
- *Y == *C)
- return TrueWhenUnset ? TrueVal : FalseVal;
- }
- }
- if (ICI->hasOneUse()) {
- const APInt *C;
- if (match(CmpRHS, m_APInt(C))) {
- // X < MIN ? T : F --> F
- if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
- return FalseVal;
- // X < MIN ? T : F --> F
- if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
- return FalseVal;
- // X > MAX ? T : F --> F
- if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
- return FalseVal;
- // X > MAX ? T : F --> F
- if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
- return FalseVal;
- }
- }
-
- // If we have an equality comparison then we know the value in one of the
- // arms of the select. See if substituting this value into the arm and
- // simplifying the result yields the same value as the other arm.
- if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
- TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
- TrueVal)
- return FalseVal;
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
- FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
- FalseVal)
- return FalseVal;
- } else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
- FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
- FalseVal)
- return TrueVal;
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
- TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
- TrueVal)
- return TrueVal;
- }
- }
+ if (Value *V =
+ simplifySelectWithICmpCond(CondVal, TrueVal, FalseVal, Q, MaxRecurse))
+ return V;
return nullptr;
}
@@ -3587,6 +3795,32 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
}
}
+ if (Q.DL.getTypeAllocSize(LastType) == 1 &&
+ all_of(Ops.slice(1).drop_back(1),
+ [](Value *Idx) { return match(Idx, m_Zero()); })) {
+ unsigned PtrWidth =
+ Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace());
+ if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) {
+ APInt BasePtrOffset(PtrWidth, 0);
+ Value *StrippedBasePtr =
+ Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL,
+ BasePtrOffset);
+
+ // gep (gep V, C), (sub 0, V) -> C
+ if (match(Ops.back(),
+ m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) {
+ auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset);
+ return ConstantExpr::getIntToPtr(CI, GEPTy);
+ }
+ // gep (gep V, C), (xor V, -1) -> C-1
+ if (match(Ops.back(),
+ m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) {
+ auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1);
+ return ConstantExpr::getIntToPtr(CI, GEPTy);
+ }
+ }
+ }
+
// Check to see if this is constant foldable.
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (!isa<Constant>(Ops[i]))
@@ -3742,19 +3976,47 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
return CommonValue;
}
-static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
- if (Constant *C = dyn_cast<Constant>(Op))
- return ConstantFoldCastOperand(Instruction::Trunc, C, Ty, Q.DL);
+static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
+ Type *Ty, const Query &Q, unsigned MaxRecurse) {
+ if (auto *C = dyn_cast<Constant>(Op))
+ return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
+
+ if (auto *CI = dyn_cast<CastInst>(Op)) {
+ auto *Src = CI->getOperand(0);
+ Type *SrcTy = Src->getType();
+ Type *MidTy = CI->getType();
+ Type *DstTy = Ty;
+ if (Src->getType() == Ty) {
+ auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+ auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
+ Type *SrcIntPtrTy =
+ SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
+ Type *MidIntPtrTy =
+ MidTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(MidTy) : nullptr;
+ Type *DstIntPtrTy =
+ DstTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(DstTy) : nullptr;
+ if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy,
+ SrcIntPtrTy, MidIntPtrTy,
+ DstIntPtrTy) == Instruction::BitCast)
+ return Src;
+ }
+ }
+
+ // bitcast x -> x
+ if (CastOpc == Instruction::BitCast)
+ if (Op->getType() == Ty)
+ return Op;
return nullptr;
}
-Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionCache *AC,
- const Instruction *CxtI) {
- return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
}
//=== Helper functions for higher up the class hierarchy.
@@ -3837,6 +4099,8 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
case Instruction::FMul:
return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+ case Instruction::FDiv:
+ return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
default:
return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
}
@@ -4223,21 +4487,23 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
TLI, DT, AC, I);
break;
}
- case Instruction::Trunc:
- Result =
- SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
+#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_CAST_INST
+ Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(),
+ DL, TLI, DT, AC, I);
break;
}
// In general, it is possible for computeKnownBits to determine all bits in a
// value even when the operands are not all constants.
- if (!Result && I->getType()->isIntegerTy()) {
+ if (!Result && I->getType()->isIntOrIntVectorTy()) {
unsigned BitWidth = I->getType()->getScalarSizeInBits();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
if ((KnownZero | KnownOne).isAllOnesValue())
- Result = ConstantInt::get(I->getContext(), KnownOne);
+ Result = ConstantInt::get(I->getType(), KnownOne);
}
/// If called on unreachable code, the above logic may report that the
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
index 3ab6b5d60905..d1374acd963e 100644
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-/// \brief Compute iterated dominance frontiers using a linear time algorithm.
+// Compute iterated dominance frontiers using a linear time algorithm.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index 08af5f37700d..15c757b48f76 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = Analysis
parent = Libraries
-required_libraries = Core Support ProfileData
+required_libraries = Core Support ProfileData Object
diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 7debfde87d2a..596b6fc1afb5 100644
--- a/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
using namespace llvm;
@@ -24,7 +24,7 @@ using namespace llvm;
INITIALIZE_PASS_BEGIN(LazyBlockFrequencyInfoPass, DEBUG_TYPE,
"Lazy Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBPIPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LazyBlockFrequencyInfoPass, DEBUG_TYPE,
"Lazy Block Frequency Analysis", true, true)
@@ -40,7 +40,7 @@ void LazyBlockFrequencyInfoPass::print(raw_ostream &OS, const Module *) const {
}
void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU);
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
@@ -48,21 +48,20 @@ void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
void LazyBlockFrequencyInfoPass::releaseMemory() { LBFI.releaseMemory(); }
bool LazyBlockFrequencyInfoPass::runOnFunction(Function &F) {
- BranchProbabilityInfo &BPI =
- getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ auto &BPIPass = getAnalysis<LazyBranchProbabilityInfoPass>();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LBFI.setAnalysis(&F, &BPI, &LI);
+ LBFI.setAnalysis(&F, &BPIPass, &LI);
return false;
}
void LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AnalysisUsage &AU) {
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU);
AU.addRequired<LazyBlockFrequencyInfoPass>();
AU.addRequired<LoopInfoWrapperPass>();
}
void llvm::initializeLazyBFIPassPass(PassRegistry &Registry) {
- INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass);
+ initializeLazyBPIPassPass(Registry);
INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
}
diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..b51c6beb7959
--- /dev/null
+++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -0,0 +1,63 @@
+//===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an alternative analysis pass to BranchProbabilityInfoWrapperPass.
+// The difference is that with this pass the branch probabilities are not
+// computed when the analysis pass is executed but rather when the BPI results
+// is explicitly requested by the analysis client.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lazy-branch-prob"
+
+INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
+ "Lazy Branch Probability Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
+ "Lazy Branch Probability Analysis", true, true)
+
+char LazyBranchProbabilityInfoPass::ID = 0;
+
+LazyBranchProbabilityInfoPass::LazyBranchProbabilityInfoPass()
+ : FunctionPass(ID) {
+ initializeLazyBranchProbabilityInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+void LazyBranchProbabilityInfoPass::print(raw_ostream &OS,
+ const Module *) const {
+ LBPI->getCalculated().print(OS);
+}
+
+void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.setPreservesAll();
+}
+
+void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
+
+bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI);
+ return false;
+}
+
+void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) {
+ AU.addRequired<LazyBranchProbabilityInfoPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+}
+
+void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) {
+ INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass);
+ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+}
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index acff8529b151..f7cf8c6729f2 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -8,7 +8,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
@@ -23,39 +26,11 @@ using namespace llvm;
static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges,
DenseMap<Function *, int> &EdgeIndexMap, Function &F,
LazyCallGraph::Edge::Kind EK) {
- // Note that we consider *any* function with a definition to be a viable
- // edge. Even if the function's definition is subject to replacement by
- // some other module (say, a weak definition) there may still be
- // optimizations which essentially speculate based on the definition and
- // a way to check that the specific definition is in fact the one being
- // used. For example, this could be done by moving the weak definition to
- // a strong (internal) definition and making the weak definition be an
- // alias. Then a test of the address of the weak function against the new
- // strong definition's address would be an effective way to determine the
- // safety of optimizing a direct call edge.
- if (!F.isDeclaration() &&
- EdgeIndexMap.insert({&F, Edges.size()}).second) {
- DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n");
- Edges.emplace_back(LazyCallGraph::Edge(F, EK));
- }
-}
-
-static void findReferences(SmallVectorImpl<Constant *> &Worklist,
- SmallPtrSetImpl<Constant *> &Visited,
- SmallVectorImpl<LazyCallGraph::Edge> &Edges,
- DenseMap<Function *, int> &EdgeIndexMap) {
- while (!Worklist.empty()) {
- Constant *C = Worklist.pop_back_val();
-
- if (Function *F = dyn_cast<Function>(C)) {
- addEdge(Edges, EdgeIndexMap, *F, LazyCallGraph::Edge::Ref);
- continue;
- }
+ if (!EdgeIndexMap.insert({&F, Edges.size()}).second)
+ return;
- for (Value *Op : C->operand_values())
- if (Visited.insert(cast<Constant>(Op)).second)
- Worklist.push_back(cast<Constant>(Op));
- }
+ DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n");
+ Edges.emplace_back(LazyCallGraph::Edge(F, EK));
}
LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
@@ -72,14 +47,26 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
// are trivially added, but to accumulate the latter we walk the instructions
// and add every operand which is a constant to the worklist to process
// afterward.
+ //
+ // Note that we consider *any* function with a definition to be a viable
+ // edge. Even if the function's definition is subject to replacement by
+ // some other module (say, a weak definition) there may still be
+ // optimizations which essentially speculate based on the definition and
+ // a way to check that the specific definition is in fact the one being
+ // used. For example, this could be done by moving the weak definition to
+ // a strong (internal) definition and making the weak definition be an
+ // alias. Then a test of the address of the weak function against the new
+ // strong definition's address would be an effective way to determine the
+ // safety of optimizing a direct call edge.
for (BasicBlock &BB : F)
for (Instruction &I : BB) {
if (auto CS = CallSite(&I))
if (Function *Callee = CS.getCalledFunction())
- if (Callees.insert(Callee).second) {
- Visited.insert(Callee);
- addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
- }
+ if (!Callee->isDeclaration())
+ if (Callees.insert(Callee).second) {
+ Visited.insert(Callee);
+ addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
+ }
for (Value *Op : I.operand_values())
if (Constant *C = dyn_cast<Constant>(Op))
@@ -90,7 +77,9 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
// We've collected all the constant (and thus potentially function or
// function containing) operands to all of the instructions in the function.
// Process them (recursively) collecting every function found.
- findReferences(Worklist, Visited, Edges, EdgeIndexMap);
+ visitReferences(Worklist, Visited, [&](Function &F) {
+ addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref);
+ });
}
void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) {
@@ -144,7 +133,9 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
DEBUG(dbgs() << " Adding functions referenced by global initializers to the "
"entry set.\n");
- findReferences(Worklist, Visited, EntryEdges, EntryIndexMap);
+ visitReferences(Worklist, Visited, [&](Function &F) {
+ addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref);
+ });
for (const Edge &E : EntryEdges)
RefSCCEntryNodes.push_back(&E.getFunction());
@@ -199,6 +190,57 @@ void LazyCallGraph::SCC::verify() {
}
#endif
+bool LazyCallGraph::SCC::isParentOf(const SCC &C) const {
+ if (this == &C)
+ return false;
+
+ for (Node &N : *this)
+ for (Edge &E : N.calls())
+ if (Node *CalleeN = E.getNode())
+ if (OuterRefSCC->G->lookupSCC(*CalleeN) == &C)
+ return true;
+
+ // No edges found.
+ return false;
+}
+
+bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const {
+ if (this == &TargetC)
+ return false;
+
+ LazyCallGraph &G = *OuterRefSCC->G;
+
+ // Start with this SCC.
+ SmallPtrSet<const SCC *, 16> Visited = {this};
+ SmallVector<const SCC *, 16> Worklist = {this};
+
+ // Walk down the graph until we run out of edges or find a path to TargetC.
+ do {
+ const SCC &C = *Worklist.pop_back_val();
+ for (Node &N : C)
+ for (Edge &E : N.calls()) {
+ Node *CalleeN = E.getNode();
+ if (!CalleeN)
+ continue;
+ SCC *CalleeC = G.lookupSCC(*CalleeN);
+ if (!CalleeC)
+ continue;
+
+ // If the callee's SCC is the TargetC, we're done.
+ if (CalleeC == &TargetC)
+ return true;
+
+ // If this is the first time we've reached this SCC, put it on the
+ // worklist to recurse through.
+ if (Visited.insert(CalleeC).second)
+ Worklist.push_back(CalleeC);
+ }
+ } while (!Worklist.empty());
+
+ // No paths found.
+ return false;
+}
+
LazyCallGraph::RefSCC::RefSCC(LazyCallGraph &G) : G(&G) {}
void LazyCallGraph::RefSCC::dump() const {
@@ -211,11 +253,17 @@ void LazyCallGraph::RefSCC::verify() {
assert(!SCCs.empty() && "Can't have an empty SCC!");
// Verify basic properties of the SCCs.
+ SmallPtrSet<SCC *, 4> SCCSet;
for (SCC *C : SCCs) {
assert(C && "Can't have a null SCC!");
C->verify();
assert(&C->getOuterRefSCC() == this &&
"SCC doesn't think it is inside this RefSCC!");
+ bool Inserted = SCCSet.insert(C).second;
+ assert(Inserted && "Found a duplicate SCC!");
+ auto IndexIt = SCCIndices.find(C);
+ assert(IndexIt != SCCIndices.end() &&
+ "Found an SCC that doesn't have an index!");
}
// Check that our indices map correctly.
@@ -223,6 +271,7 @@ void LazyCallGraph::RefSCC::verify() {
SCC *C = SCCIndexPair.first;
int i = SCCIndexPair.second;
assert(C && "Can't have a null SCC in the indices!");
+ assert(SCCSet.count(C) && "Found an index for an SCC not in the RefSCC!");
assert(SCCs[i] == C && "Index doesn't point to SCC!");
}
@@ -243,6 +292,20 @@ void LazyCallGraph::RefSCC::verify() {
"Edge to a RefSCC missing us in its parent set.");
}
}
+
+ // Check that our parents are actually parents.
+ for (RefSCC *ParentRC : Parents) {
+ assert(ParentRC != this && "Cannot be our own parent!");
+ auto HasConnectingEdge = [&] {
+ for (SCC &C : *ParentRC)
+ for (Node &N : C)
+ for (Edge &E : N)
+ if (G->lookupRefSCC(*E.getNode()) == this)
+ return true;
+ return false;
+ };
+ assert(HasConnectingEdge() && "No edge connects the parent to us!");
+ }
}
#endif
@@ -261,12 +324,153 @@ bool LazyCallGraph::RefSCC::isDescendantOf(const RefSCC &C) const {
return false;
}
+/// Generic helper that updates a postorder sequence of SCCs for a potentially
+/// cycle-introducing edge insertion.
+///
+/// A postorder sequence of SCCs of a directed graph has one fundamental
+/// property: all deges in the DAG of SCCs point "up" the sequence. That is,
+/// all edges in the SCC DAG point to prior SCCs in the sequence.
+///
+/// This routine both updates a postorder sequence and uses that sequence to
+/// compute the set of SCCs connected into a cycle. It should only be called to
+/// insert a "downward" edge which will require changing the sequence to
+/// restore it to a postorder.
+///
+/// When inserting an edge from an earlier SCC to a later SCC in some postorder
+/// sequence, all of the SCCs which may be impacted are in the closed range of
+/// those two within the postorder sequence. The algorithm used here to restore
+/// the state is as follows:
+///
+/// 1) Starting from the source SCC, construct a set of SCCs which reach the
+/// source SCC consisting of just the source SCC. Then scan toward the
+/// target SCC in postorder and for each SCC, if it has an edge to an SCC
+/// in the set, add it to the set. Otherwise, the source SCC is not
+/// a successor, move it in the postorder sequence to immediately before
+/// the source SCC, shifting the source SCC and all SCCs in the set one
+/// position toward the target SCC. Stop scanning after processing the
+/// target SCC.
+/// 2) If the source SCC is now past the target SCC in the postorder sequence,
+/// and thus the new edge will flow toward the start, we are done.
+/// 3) Otherwise, starting from the target SCC, walk all edges which reach an
+/// SCC between the source and the target, and add them to the set of
+/// connected SCCs, then recurse through them. Once a complete set of the
+/// SCCs the target connects to is known, hoist the remaining SCCs between
+/// the source and the target to be above the target. Note that there is no
+/// need to process the source SCC, it is already known to connect.
+/// 4) At this point, all of the SCCs in the closed range between the source
+/// SCC and the target SCC in the postorder sequence are connected,
+/// including the target SCC and the source SCC. Inserting the edge from
+/// the source SCC to the target SCC will form a cycle out of precisely
+/// these SCCs. Thus we can merge all of the SCCs in this closed range into
+/// a single SCC.
+///
+/// This process has various important properties:
+/// - Only mutates the SCCs when adding the edge actually changes the SCC
+/// structure.
+/// - Never mutates SCCs which are unaffected by the change.
+/// - Updates the postorder sequence to correctly satisfy the postorder
+/// constraint after the edge is inserted.
+/// - Only reorders SCCs in the closed postorder sequence from the source to
+/// the target, so easy to bound how much has changed even in the ordering.
+/// - Big-O is the number of edges in the closed postorder range of SCCs from
+/// source to target.
+///
+/// This helper routine, in addition to updating the postorder sequence itself
+/// will also update a map from SCCs to indices within that sequecne.
+///
+/// The sequence and the map must operate on pointers to the SCC type.
+///
+/// Two callbacks must be provided. The first computes the subset of SCCs in
+/// the postorder closed range from the source to the target which connect to
+/// the source SCC via some (transitive) set of edges. The second computes the
+/// subset of the same range which the target SCC connects to via some
+/// (transitive) set of edges. Both callbacks should populate the set argument
+/// provided.
+template <typename SCCT, typename PostorderSequenceT, typename SCCIndexMapT,
+ typename ComputeSourceConnectedSetCallableT,
+ typename ComputeTargetConnectedSetCallableT>
+static iterator_range<typename PostorderSequenceT::iterator>
+updatePostorderSequenceForEdgeInsertion(
+ SCCT &SourceSCC, SCCT &TargetSCC, PostorderSequenceT &SCCs,
+ SCCIndexMapT &SCCIndices,
+ ComputeSourceConnectedSetCallableT ComputeSourceConnectedSet,
+ ComputeTargetConnectedSetCallableT ComputeTargetConnectedSet) {
+ int SourceIdx = SCCIndices[&SourceSCC];
+ int TargetIdx = SCCIndices[&TargetSCC];
+ assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
+
+ SmallPtrSet<SCCT *, 4> ConnectedSet;
+
+ // Compute the SCCs which (transitively) reach the source.
+ ComputeSourceConnectedSet(ConnectedSet);
+
+ // Partition the SCCs in this part of the port-order sequence so only SCCs
+ // connecting to the source remain between it and the target. This is
+ // a benign partition as it preserves postorder.
+ auto SourceI = std::stable_partition(
+ SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
+ [&ConnectedSet](SCCT *C) { return !ConnectedSet.count(C); });
+ for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
+ SCCIndices.find(SCCs[i])->second = i;
+
+ // If the target doesn't connect to the source, then we've corrected the
+ // post-order and there are no cycles formed.
+ if (!ConnectedSet.count(&TargetSCC)) {
+ assert(SourceI > (SCCs.begin() + SourceIdx) &&
+ "Must have moved the source to fix the post-order.");
+ assert(*std::prev(SourceI) == &TargetSCC &&
+ "Last SCC to move should have bene the target.");
+
+ // Return an empty range at the target SCC indicating there is nothing to
+ // merge.
+ return make_range(std::prev(SourceI), std::prev(SourceI));
+ }
+
+ assert(SCCs[TargetIdx] == &TargetSCC &&
+ "Should not have moved target if connected!");
+ SourceIdx = SourceI - SCCs.begin();
+ assert(SCCs[SourceIdx] == &SourceSCC &&
+ "Bad updated index computation for the source SCC!");
+
+
+ // See whether there are any remaining intervening SCCs between the source
+ // and target. If so we need to make sure they all are reachable form the
+ // target.
+ if (SourceIdx + 1 < TargetIdx) {
+ ConnectedSet.clear();
+ ComputeTargetConnectedSet(ConnectedSet);
+
+ // Partition SCCs so that only SCCs reached from the target remain between
+ // the source and the target. This preserves postorder.
+ auto TargetI = std::stable_partition(
+ SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
+ [&ConnectedSet](SCCT *C) { return ConnectedSet.count(C); });
+ for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
+ SCCIndices.find(SCCs[i])->second = i;
+ TargetIdx = std::prev(TargetI) - SCCs.begin();
+ assert(SCCs[TargetIdx] == &TargetSCC &&
+ "Should always end with the target!");
+ }
+
+ // At this point, we know that connecting source to target forms a cycle
+ // because target connects back to source, and we know that all of the SCCs
+ // between the source and target in the postorder sequence participate in that
+ // cycle.
+ return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
+}
+
SmallVector<LazyCallGraph::SCC *, 1>
LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
-
SmallVector<SCC *, 1> DeletedSCCs;
+#ifndef NDEBUG
+ // In a debug build, verify the RefSCC is valid to start with and when this
+ // routine finishes.
+ verify();
+ auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
SCC &SourceSCC = *G->lookupSCC(SourceN);
SCC &TargetSCC = *G->lookupSCC(TargetN);
@@ -274,10 +478,6 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
// we've just added more connectivity.
if (&SourceSCC == &TargetSCC) {
SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-#ifndef NDEBUG
- // Check that the RefSCC is still valid.
- verify();
-#endif
return DeletedSCCs;
}
@@ -291,114 +491,44 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
int TargetIdx = SCCIndices[&TargetSCC];
if (TargetIdx < SourceIdx) {
SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-#ifndef NDEBUG
- // Check that the RefSCC is still valid.
- verify();
-#endif
return DeletedSCCs;
}
- // When we do have an edge from an earlier SCC to a later SCC in the
- // postorder sequence, all of the SCCs which may be impacted are in the
- // closed range of those two within the postorder sequence. The algorithm to
- // restore the state is as follows:
- //
- // 1) Starting from the source SCC, construct a set of SCCs which reach the
- // source SCC consisting of just the source SCC. Then scan toward the
- // target SCC in postorder and for each SCC, if it has an edge to an SCC
- // in the set, add it to the set. Otherwise, the source SCC is not
- // a successor, move it in the postorder sequence to immediately before
- // the source SCC, shifting the source SCC and all SCCs in the set one
- // position toward the target SCC. Stop scanning after processing the
- // target SCC.
- // 2) If the source SCC is now past the target SCC in the postorder sequence,
- // and thus the new edge will flow toward the start, we are done.
- // 3) Otherwise, starting from the target SCC, walk all edges which reach an
- // SCC between the source and the target, and add them to the set of
- // connected SCCs, then recurse through them. Once a complete set of the
- // SCCs the target connects to is known, hoist the remaining SCCs between
- // the source and the target to be above the target. Note that there is no
- // need to process the source SCC, it is already known to connect.
- // 4) At this point, all of the SCCs in the closed range between the source
- // SCC and the target SCC in the postorder sequence are connected,
- // including the target SCC and the source SCC. Inserting the edge from
- // the source SCC to the target SCC will form a cycle out of precisely
- // these SCCs. Thus we can merge all of the SCCs in this closed range into
- // a single SCC.
- //
- // This process has various important properties:
- // - Only mutates the SCCs when adding the edge actually changes the SCC
- // structure.
- // - Never mutates SCCs which are unaffected by the change.
- // - Updates the postorder sequence to correctly satisfy the postorder
- // constraint after the edge is inserted.
- // - Only reorders SCCs in the closed postorder sequence from the source to
- // the target, so easy to bound how much has changed even in the ordering.
- // - Big-O is the number of edges in the closed postorder range of SCCs from
- // source to target.
-
- assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
- SmallPtrSet<SCC *, 4> ConnectedSet;
-
// Compute the SCCs which (transitively) reach the source.
- ConnectedSet.insert(&SourceSCC);
- auto IsConnected = [&](SCC &C) {
- for (Node &N : C)
- for (Edge &E : N.calls()) {
- assert(E.getNode() && "Must have formed a node within an SCC!");
- if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
- return true;
- }
-
- return false;
- };
-
- for (SCC *C :
- make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
- if (IsConnected(*C))
- ConnectedSet.insert(C);
-
- // Partition the SCCs in this part of the port-order sequence so only SCCs
- // connecting to the source remain between it and the target. This is
- // a benign partition as it preserves postorder.
- auto SourceI = std::stable_partition(
- SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
- [&ConnectedSet](SCC *C) { return !ConnectedSet.count(C); });
- for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
- SCCIndices.find(SCCs[i])->second = i;
-
- // If the target doesn't connect to the source, then we've corrected the
- // post-order and there are no cycles formed.
- if (!ConnectedSet.count(&TargetSCC)) {
- assert(SourceI > (SCCs.begin() + SourceIdx) &&
- "Must have moved the source to fix the post-order.");
- assert(*std::prev(SourceI) == &TargetSCC &&
- "Last SCC to move should have bene the target.");
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+ auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
#ifndef NDEBUG
+ // Check that the RefSCC is still valid before computing this as the
+ // results will be nonsensical of we've broken its invariants.
verify();
#endif
- return DeletedSCCs;
- }
+ ConnectedSet.insert(&SourceSCC);
+ auto IsConnected = [&](SCC &C) {
+ for (Node &N : C)
+ for (Edge &E : N.calls()) {
+ assert(E.getNode() && "Must have formed a node within an SCC!");
+ if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
+ return true;
+ }
- assert(SCCs[TargetIdx] == &TargetSCC &&
- "Should not have moved target if connected!");
- SourceIdx = SourceI - SCCs.begin();
+ return false;
+ };
+
+ for (SCC *C :
+ make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
+ if (IsConnected(*C))
+ ConnectedSet.insert(C);
+ };
+ // Use a normal worklist to find which SCCs the target connects to. We still
+ // bound the search based on the range in the postorder list we care about,
+ // but because this is forward connectivity we just "recurse" through the
+ // edges.
+ auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
#ifndef NDEBUG
- // Check that the RefSCC is still valid.
- verify();
+ // Check that the RefSCC is still valid before computing this as the
+ // results will be nonsensical of we've broken its invariants.
+ verify();
#endif
-
- // See whether there are any remaining intervening SCCs between the source
- // and target. If so we need to make sure they all are reachable form the
- // target.
- if (SourceIdx + 1 < TargetIdx) {
- // Use a normal worklist to find which SCCs the target connects to. We still
- // bound the search based on the range in the postorder list we care about,
- // but because this is forward connectivity we just "recurse" through the
- // edges.
- ConnectedSet.clear();
ConnectedSet.insert(&TargetSCC);
SmallVector<SCC *, 4> Worklist;
Worklist.push_back(&TargetSCC);
@@ -421,35 +551,36 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
Worklist.push_back(&EdgeC);
}
} while (!Worklist.empty());
+ };
- // Partition SCCs so that only SCCs reached from the target remain between
- // the source and the target. This preserves postorder.
- auto TargetI = std::stable_partition(
- SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
- [&ConnectedSet](SCC *C) { return ConnectedSet.count(C); });
- for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
- SCCIndices.find(SCCs[i])->second = i;
- TargetIdx = std::prev(TargetI) - SCCs.begin();
- assert(SCCs[TargetIdx] == &TargetSCC &&
- "Should always end with the target!");
+ // Use a generic helper to update the postorder sequence of SCCs and return
+ // a range of any SCCs connected into a cycle by inserting this edge. This
+ // routine will also take care of updating the indices into the postorder
+ // sequence.
+ auto MergeRange = updatePostorderSequenceForEdgeInsertion(
+ SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet,
+ ComputeTargetConnectedSet);
+
+ // If the merge range is empty, then adding the edge didn't actually form any
+ // new cycles. We're done.
+ if (MergeRange.begin() == MergeRange.end()) {
+ // Now that the SCC structure is finalized, flip the kind to call.
+ SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+ return DeletedSCCs;
+ }
#ifndef NDEBUG
- // Check that the RefSCC is still valid.
- verify();
+ // Before merging, check that the RefSCC remains valid after all the
+ // postorder updates.
+ verify();
#endif
- }
- // At this point, we know that connecting source to target forms a cycle
- // because target connects back to source, and we know that all of the SCCs
- // between the source and target in the postorder sequence participate in that
- // cycle. This means that we need to merge all of these SCCs into a single
+ // Otherwise we need to merge all of the SCCs in the cycle into a single
// result SCC.
//
// NB: We merge into the target because all of these functions were already
// reachable from the target, meaning any SCC-wide properties deduced about it
// other than the set of functions within it will not have changed.
- auto MergeRange =
- make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
for (SCC *C : MergeRange) {
assert(C != &TargetSCC &&
"We merge *into* the target and shouldn't process it here!");
@@ -471,37 +602,55 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
// Now that the SCC structure is finalized, flip the kind to call.
SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-#ifndef NDEBUG
- // And we're done! Verify in debug builds that the RefSCC is coherent.
- verify();
-#endif
+ // And we're done!
return DeletedSCCs;
}
-void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
- Node &TargetN) {
+void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
+ Node &TargetN) {
assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
- SCC &SourceSCC = *G->lookupSCC(SourceN);
- SCC &TargetSCC = *G->lookupSCC(TargetN);
+#ifndef NDEBUG
+ // In a debug build, verify the RefSCC is valid to start with and when this
+ // routine finishes.
+ verify();
+ auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
- assert(&SourceSCC.getOuterRefSCC() == this &&
+ assert(G->lookupRefSCC(SourceN) == this &&
"Source must be in this RefSCC.");
- assert(&TargetSCC.getOuterRefSCC() == this &&
+ assert(G->lookupRefSCC(TargetN) == this &&
"Target must be in this RefSCC.");
+ assert(G->lookupSCC(SourceN) != G->lookupSCC(TargetN) &&
+ "Source and Target must be in separate SCCs for this to be trivial!");
// Set the edge kind.
SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+}
+
+iterator_range<LazyCallGraph::RefSCC::iterator>
+LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
+ assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
- // If this call edge is just connecting two separate SCCs within this RefSCC,
- // there is nothing to do.
- if (&SourceSCC != &TargetSCC) {
#ifndef NDEBUG
- // Check that the RefSCC is still valid.
- verify();
+ // In a debug build, verify the RefSCC is valid to start with and when this
+ // routine finishes.
+ verify();
+ auto VerifyOnExit = make_scope_exit([&]() { verify(); });
#endif
- return;
- }
+
+ assert(G->lookupRefSCC(SourceN) == this &&
+ "Source must be in this RefSCC.");
+ assert(G->lookupRefSCC(TargetN) == this &&
+ "Target must be in this RefSCC.");
+
+ SCC &TargetSCC = *G->lookupSCC(TargetN);
+ assert(G->lookupSCC(SourceN) == &TargetSCC && "Source and Target must be in "
+ "the same SCC to require the "
+ "full CG update.");
+
+ // Set the edge kind.
+ SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
// Otherwise we are removing a call edge from a single SCC. This may break
// the cycle. In order to compute the new set of SCCs, we need to do a small
@@ -635,10 +784,9 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
// root DFS number.
auto SCCNodes = make_range(
PendingSCCStack.rbegin(),
- std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(),
- [RootDFSNumber](Node *N) {
- return N->DFSNumber < RootDFSNumber;
- }));
+ find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) {
+ return N->DFSNumber < RootDFSNumber;
+ }));
// Form a new SCC out of these nodes and then clear them off our pending
// stack.
@@ -663,10 +811,8 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
for (int Idx = OldIdx, Size = SCCs.size(); Idx < Size; ++Idx)
SCCIndices[SCCs[Idx]] = Idx;
-#ifndef NDEBUG
- // We're done. Check the validity on our way out.
- verify();
-#endif
+ return make_range(SCCs.begin() + OldIdx,
+ SCCs.begin() + OldIdx + NewSCCs.size());
}
void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN,
@@ -746,112 +892,113 @@ void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN,
SmallVector<LazyCallGraph::RefSCC *, 1>
LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
- assert(G->lookupRefSCC(TargetN) == this && "Target must be in this SCC.");
-
- // We store the RefSCCs found to be connected in postorder so that we can use
- // that when merging. We also return this to the caller to allow them to
- // invalidate information pertaining to these RefSCCs.
- SmallVector<RefSCC *, 1> Connected;
-
+ assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
RefSCC &SourceC = *G->lookupRefSCC(SourceN);
- assert(&SourceC != this && "Source must not be in this SCC.");
+ assert(&SourceC != this && "Source must not be in this RefSCC.");
assert(SourceC.isDescendantOf(*this) &&
"Source must be a descendant of the Target.");
- // The algorithm we use for merging SCCs based on the cycle introduced here
- // is to walk the RefSCC inverted DAG formed by the parent sets. The inverse
- // graph has the same cycle properties as the actual DAG of the RefSCCs, and
- // when forming RefSCCs lazily by a DFS, the bottom of the graph won't exist
- // in many cases which should prune the search space.
- //
- // FIXME: We can get this pruning behavior even after the incremental RefSCC
- // formation by leaving behind (conservative) DFS numberings in the nodes,
- // and pruning the search with them. These would need to be cleverly updated
- // during the removal of intra-SCC edges, but could be preserved
- // conservatively.
- //
- // FIXME: This operation currently creates ordering stability problems
- // because we don't use stably ordered containers for the parent SCCs.
-
- // The set of RefSCCs that are connected to the parent, and thus will
- // participate in the merged connected component.
- SmallPtrSet<RefSCC *, 8> ConnectedSet;
- ConnectedSet.insert(this);
-
- // We build up a DFS stack of the parents chains.
- SmallVector<std::pair<RefSCC *, parent_iterator>, 8> DFSStack;
- SmallPtrSet<RefSCC *, 8> Visited;
- int ConnectedDepth = -1;
- DFSStack.push_back({&SourceC, SourceC.parent_begin()});
- do {
- auto DFSPair = DFSStack.pop_back_val();
- RefSCC *C = DFSPair.first;
- parent_iterator I = DFSPair.second;
- auto E = C->parent_end();
+ SmallVector<RefSCC *, 1> DeletedRefSCCs;
- while (I != E) {
- RefSCC &Parent = *I++;
-
- // If we have already processed this parent SCC, skip it, and remember
- // whether it was connected so we don't have to check the rest of the
- // stack. This also handles when we reach a child of the 'this' SCC (the
- // callee) which terminates the search.
- if (ConnectedSet.count(&Parent)) {
- assert(ConnectedDepth < (int)DFSStack.size() &&
- "Cannot have a connected depth greater than the DFS depth!");
- ConnectedDepth = DFSStack.size();
- continue;
+#ifndef NDEBUG
+ // In a debug build, verify the RefSCC is valid to start with and when this
+ // routine finishes.
+ verify();
+ auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
+ int SourceIdx = G->RefSCCIndices[&SourceC];
+ int TargetIdx = G->RefSCCIndices[this];
+ assert(SourceIdx < TargetIdx &&
+ "Postorder list doesn't see edge as incoming!");
+
+ // Compute the RefSCCs which (transitively) reach the source. We do this by
+ // working backwards from the source using the parent set in each RefSCC,
+ // skipping any RefSCCs that don't fall in the postorder range. This has the
+ // advantage of walking the sparser parent edge (in high fan-out graphs) but
+ // more importantly this removes examining all forward edges in all RefSCCs
+ // within the postorder range which aren't in fact connected. Only connected
+ // RefSCCs (and their edges) are visited here.
+ auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+ Set.insert(&SourceC);
+ SmallVector<RefSCC *, 4> Worklist;
+ Worklist.push_back(&SourceC);
+ do {
+ RefSCC &RC = *Worklist.pop_back_val();
+ for (RefSCC &ParentRC : RC.parents()) {
+ // Skip any RefSCCs outside the range of source to target in the
+ // postorder sequence.
+ int ParentIdx = G->getRefSCCIndex(ParentRC);
+ assert(ParentIdx > SourceIdx && "Parent cannot precede source in postorder!");
+ if (ParentIdx > TargetIdx)
+ continue;
+ if (Set.insert(&ParentRC).second)
+ // First edge connecting to this parent, add it to our worklist.
+ Worklist.push_back(&ParentRC);
}
- if (Visited.count(&Parent))
- continue;
+ } while (!Worklist.empty());
+ };
- // We fully explore the depth-first space, adding nodes to the connected
- // set only as we pop them off, so "recurse" by rotating to the parent.
- DFSStack.push_back({C, I});
- C = &Parent;
- I = C->parent_begin();
- E = C->parent_end();
- }
+ // Use a normal worklist to find which SCCs the target connects to. We still
+ // bound the search based on the range in the postorder list we care about,
+ // but because this is forward connectivity we just "recurse" through the
+ // edges.
+ auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+ Set.insert(this);
+ SmallVector<RefSCC *, 4> Worklist;
+ Worklist.push_back(this);
+ do {
+ RefSCC &RC = *Worklist.pop_back_val();
+ for (SCC &C : RC)
+ for (Node &N : C)
+ for (Edge &E : N) {
+ assert(E.getNode() && "Must have formed a node!");
+ RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode());
+ if (G->getRefSCCIndex(EdgeRC) <= SourceIdx)
+ // Not in the postorder sequence between source and target.
+ continue;
+
+ if (Set.insert(&EdgeRC).second)
+ Worklist.push_back(&EdgeRC);
+ }
+ } while (!Worklist.empty());
+ };
- // If we've found a connection anywhere below this point on the stack (and
- // thus up the parent graph from the caller), the current node needs to be
- // added to the connected set now that we've processed all of its parents.
- if ((int)DFSStack.size() == ConnectedDepth) {
- --ConnectedDepth; // We're finished with this connection.
- bool Inserted = ConnectedSet.insert(C).second;
- (void)Inserted;
- assert(Inserted && "Cannot insert a refSCC multiple times!");
- Connected.push_back(C);
- } else {
- // Otherwise remember that its parents don't ever connect.
- assert(ConnectedDepth < (int)DFSStack.size() &&
- "Cannot have a connected depth greater than the DFS depth!");
- Visited.insert(C);
- }
- } while (!DFSStack.empty());
+ // Use a generic helper to update the postorder sequence of RefSCCs and return
+ // a range of any RefSCCs connected into a cycle by inserting this edge. This
+ // routine will also take care of updating the indices into the postorder
+ // sequence.
+ iterator_range<SmallVectorImpl<RefSCC *>::iterator> MergeRange =
+ updatePostorderSequenceForEdgeInsertion(
+ SourceC, *this, G->PostOrderRefSCCs, G->RefSCCIndices,
+ ComputeSourceConnectedSet, ComputeTargetConnectedSet);
+
+ // Build a set so we can do fast tests for whether a RefSCC will end up as
+ // part of the merged RefSCC.
+ SmallPtrSet<RefSCC *, 16> MergeSet(MergeRange.begin(), MergeRange.end());
+
+ // This RefSCC will always be part of that set, so just insert it here.
+ MergeSet.insert(this);
// Now that we have identified all of the SCCs which need to be merged into
// a connected set with the inserted edge, merge all of them into this SCC.
- // We walk the newly connected RefSCCs in the reverse postorder of the parent
- // DAG walk above and merge in each of their SCC postorder lists. This
- // ensures a merged postorder SCC list.
SmallVector<SCC *, 16> MergedSCCs;
int SCCIndex = 0;
- for (RefSCC *C : reverse(Connected)) {
- assert(C != this &&
- "This RefSCC should terminate the DFS without being reached.");
+ for (RefSCC *RC : MergeRange) {
+ assert(RC != this && "We're merging into the target RefSCC, so it "
+ "shouldn't be in the range.");
// Merge the parents which aren't part of the merge into the our parents.
- for (RefSCC *ParentC : C->Parents)
- if (!ConnectedSet.count(ParentC))
- Parents.insert(ParentC);
- C->Parents.clear();
+ for (RefSCC *ParentRC : RC->Parents)
+ if (!MergeSet.count(ParentRC))
+ Parents.insert(ParentRC);
+ RC->Parents.clear();
// Walk the inner SCCs to update their up-pointer and walk all the edges to
// update any parent sets.
// FIXME: We should try to find a way to avoid this (rather expensive) edge
// walk by updating the parent sets in some other manner.
- for (SCC &InnerC : *C) {
+ for (SCC &InnerC : *RC) {
InnerC.OuterRefSCC = this;
SCCIndices[&InnerC] = SCCIndex++;
for (Node &N : InnerC) {
@@ -860,9 +1007,9 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
assert(E.getNode() &&
"Cannot have a null node within a visited SCC!");
RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
- if (ConnectedSet.count(&ChildRC))
+ if (MergeSet.count(&ChildRC))
continue;
- ChildRC.Parents.erase(C);
+ ChildRC.Parents.erase(RC);
ChildRC.Parents.insert(this);
}
}
@@ -871,33 +1018,37 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
// Now merge in the SCCs. We can actually move here so try to reuse storage
// the first time through.
if (MergedSCCs.empty())
- MergedSCCs = std::move(C->SCCs);
+ MergedSCCs = std::move(RC->SCCs);
else
- MergedSCCs.append(C->SCCs.begin(), C->SCCs.end());
- C->SCCs.clear();
+ MergedSCCs.append(RC->SCCs.begin(), RC->SCCs.end());
+ RC->SCCs.clear();
+ DeletedRefSCCs.push_back(RC);
}
- // Finally append our original SCCs to the merged list and move it into
- // place.
+ // Append our original SCCs to the merged list and move it into place.
for (SCC &InnerC : *this)
SCCIndices[&InnerC] = SCCIndex++;
MergedSCCs.append(SCCs.begin(), SCCs.end());
SCCs = std::move(MergedSCCs);
+ // Remove the merged away RefSCCs from the post order sequence.
+ for (RefSCC *RC : MergeRange)
+ G->RefSCCIndices.erase(RC);
+ int IndexOffset = MergeRange.end() - MergeRange.begin();
+ auto EraseEnd =
+ G->PostOrderRefSCCs.erase(MergeRange.begin(), MergeRange.end());
+ for (RefSCC *RC : make_range(EraseEnd, G->PostOrderRefSCCs.end()))
+ G->RefSCCIndices[RC] -= IndexOffset;
+
// At this point we have a merged RefSCC with a post-order SCCs list, just
// connect the nodes to form the new edge.
SourceN.insertEdgeInternal(TargetN, Edge::Ref);
-#ifndef NDEBUG
- // Check that the RefSCC is still valid.
- verify();
-#endif
-
// We return the list of SCCs which were merged so that callers can
// invalidate any data they have associated with those SCCs. Note that these
// SCCs are no longer in an interesting state (they are totally empty) but
// the pointers will remain stable for the life of the graph itself.
- return Connected;
+ return DeletedRefSCCs;
}
void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
@@ -907,10 +1058,16 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
assert(&TargetRC != this && "The target must not be a member of this RefSCC");
- assert(std::find(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this) ==
- G->LeafRefSCCs.end() &&
+ assert(!is_contained(G->LeafRefSCCs, this) &&
"Cannot have a leaf RefSCC source.");
+#ifndef NDEBUG
+ // In a debug build, verify the RefSCC is valid to start with and when this
+ // routine finishes.
+ verify();
+ auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
// First remove it from the node.
SourceN.removeEdgeInternal(TargetN.getFunction());
@@ -962,6 +1119,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
assert(!SourceN[TargetN].isCall() &&
"Cannot remove a call edge, it must first be made a ref edge");
+#ifndef NDEBUG
+ // In a debug build, verify the RefSCC is valid to start with and when this
+ // routine finishes.
+ verify();
+ auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
// First remove the actual edge.
SourceN.removeEdgeInternal(TargetN.getFunction());
@@ -972,6 +1136,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
if (&SourceN == &TargetN)
return Result;
+ // If this ref edge is within an SCC then there are sufficient other edges to
+ // form a cycle without this edge so removing it is a no-op.
+ SCC &SourceC = *G->lookupSCC(SourceN);
+ SCC &TargetC = *G->lookupSCC(TargetN);
+ if (&SourceC == &TargetC)
+ return Result;
+
// We build somewhat synthetic new RefSCCs by providing a postorder mapping
// for each inner SCC. We also store these associated with *nodes* rather
// than SCCs because this saves a round-trip through the node->SCC map and in
@@ -994,7 +1165,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
// and handle participants in that cycle without walking all the edges that
// form the connections, and instead by relying on the fundamental guarantee
// coming into this operation.
- SCC &TargetC = *G->lookupSCC(TargetN);
for (Node &N : TargetC)
PostOrderMapping[&N] = RootPostOrderNumber;
@@ -1082,9 +1252,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
}
// If this child isn't currently in this RefSCC, no need to process
- // it.
- // However, we do need to remove this RefSCC from its RefSCC's parent
- // set.
+ // it. However, we do need to remove this RefSCC from its RefSCC's
+ // parent set.
RefSCC &ChildRC = *G->lookupRefSCC(ChildN);
ChildRC.Parents.erase(this);
++I;
@@ -1121,10 +1290,9 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
// root DFS number.
auto RefSCCNodes = make_range(
PendingRefSCCStack.rbegin(),
- std::find_if(PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(),
- [RootDFSNumber](Node *N) {
- return N->DFSNumber < RootDFSNumber;
- }));
+ find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
+ return N->DFSNumber < RootDFSNumber;
+ }));
// Mark the postorder number for these nodes and clear them off the
// stack. We'll use the postorder number to pull them into RefSCCs at the
@@ -1149,6 +1317,25 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
for (int i = 1; i < PostOrderNumber; ++i)
Result.push_back(G->createRefSCC(*G));
+ // Insert the resulting postorder sequence into the global graph postorder
+ // sequence before the current RefSCC in that sequence. The idea being that
+ // this RefSCC is the target of the reference edge removed, and thus has
+ // a direct or indirect edge to every other RefSCC formed and so must be at
+ // the end of any postorder traversal.
+ //
+ // FIXME: It'd be nice to change the APIs so that we returned an iterator
+ // range over the global postorder sequence and generally use that sequence
+ // rather than building a separate result vector here.
+ if (!Result.empty()) {
+ int Idx = G->getRefSCCIndex(*this);
+ G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx,
+ Result.begin(), Result.end());
+ for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size()))
+ G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i;
+ assert(G->PostOrderRefSCCs[G->getRefSCCIndex(*this)] == this &&
+ "Failed to update this RefSCC's index after insertion!");
+ }
+
for (SCC *C : SCCs) {
auto PostOrderI = PostOrderMapping.find(&*C->begin());
assert(PostOrderI != PostOrderMapping.end() &&
@@ -1166,7 +1353,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
RefSCC &RC = *Result[SCCNumber - 1];
int SCCIndex = RC.SCCs.size();
RC.SCCs.push_back(C);
- SCCIndices[C] = SCCIndex;
+ RC.SCCIndices[C] = SCCIndex;
C->OuterRefSCC = &RC;
}
@@ -1178,12 +1365,15 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
G->connectRefSCC(*RC);
// Now erase all but the root's SCCs.
- SCCs.erase(std::remove_if(SCCs.begin(), SCCs.end(),
- [&](SCC *C) {
- return PostOrderMapping.lookup(&*C->begin()) !=
- RootPostOrderNumber;
- }),
+ SCCs.erase(remove_if(SCCs,
+ [&](SCC *C) {
+ return PostOrderMapping.lookup(&*C->begin()) !=
+ RootPostOrderNumber;
+ }),
SCCs.end());
+ SCCIndices.clear();
+ for (int i = 0, Size = SCCs.size(); i < Size; ++i)
+ SCCIndices[SCCs[i]] = i;
#ifndef NDEBUG
// Now we need to reconnect the current (root) SCC to the graph. We do this
@@ -1207,11 +1397,24 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
if (!Result.empty())
assert(!IsLeaf && "This SCC cannot be a leaf as we have split out new "
"SCCs by removing this edge.");
- if (!std::any_of(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(),
- [&](RefSCC *C) { return C == this; }))
+ if (none_of(G->LeafRefSCCs, [&](RefSCC *C) { return C == this; }))
assert(!IsLeaf && "This SCC cannot be a leaf as it already had child "
"SCCs before we removed this edge.");
#endif
+ // And connect both this RefSCC and all the new ones to the correct parents.
+ // The easiest way to do this is just to re-analyze the old parent set.
+ SmallVector<RefSCC *, 4> OldParents(Parents.begin(), Parents.end());
+ Parents.clear();
+ for (RefSCC *ParentRC : OldParents)
+ for (SCC &ParentC : *ParentRC)
+ for (Node &ParentN : ParentC)
+ for (Edge &E : ParentN) {
+ assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
+ RefSCC &RC = *G->lookupRefSCC(*E.getNode());
+ if (&RC != ParentRC)
+ RC.Parents.insert(ParentRC);
+ }
+
// If this SCC stopped being a leaf through this edge removal, remove it from
// the leaf SCC list. Note that this DTRT in the case where this was never
// a leaf.
@@ -1222,10 +1425,93 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
std::remove(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this),
G->LeafRefSCCs.end());
+#ifndef NDEBUG
+ // Verify all of the new RefSCCs.
+ for (RefSCC *RC : Result)
+ RC->verify();
+#endif
+
// Return the new list of SCCs.
return Result;
}
+void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN,
+ Node &TargetN) {
+ // The only trivial case that requires any graph updates is when we add new
+ // ref edge and may connect different RefSCCs along that path. This is only
+ // because of the parents set. Every other part of the graph remains constant
+ // after this edge insertion.
+ assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
+ RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
+ if (&TargetRC == this) {
+
+ return;
+ }
+
+ assert(TargetRC.isDescendantOf(*this) &&
+ "Target must be a descendant of the Source.");
+ // The only change required is to add this RefSCC to the parent set of the
+ // target. This is a set and so idempotent if the edge already existed.
+ TargetRC.Parents.insert(this);
+}
+
+void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
+ Node &TargetN) {
+#ifndef NDEBUG
+ // Check that the RefSCC is still valid when we finish.
+ auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+ // Check that we aren't breaking some invariants of the SCC graph.
+ SCC &SourceC = *G->lookupSCC(SourceN);
+ SCC &TargetC = *G->lookupSCC(TargetN);
+ if (&SourceC != &TargetC)
+ assert(SourceC.isAncestorOf(TargetC) &&
+ "Call edge is not trivial in the SCC graph!");
+#endif
+ // First insert it into the source or find the existing edge.
+ auto InsertResult = SourceN.EdgeIndexMap.insert(
+ {&TargetN.getFunction(), SourceN.Edges.size()});
+ if (!InsertResult.second) {
+ // Already an edge, just update it.
+ Edge &E = SourceN.Edges[InsertResult.first->second];
+ if (E.isCall())
+ return; // Nothing to do!
+ E.setKind(Edge::Call);
+ } else {
+ // Create the new edge.
+ SourceN.Edges.emplace_back(TargetN, Edge::Call);
+ }
+
+ // Now that we have the edge, handle the graph fallout.
+ handleTrivialEdgeInsertion(SourceN, TargetN);
+}
+
+void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) {
+#ifndef NDEBUG
+ // Check that the RefSCC is still valid when we finish.
+ auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+ // Check that we aren't breaking some invariants of the RefSCC graph.
+ RefSCC &SourceRC = *G->lookupRefSCC(SourceN);
+ RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
+ if (&SourceRC != &TargetRC)
+ assert(SourceRC.isAncestorOf(TargetRC) &&
+ "Ref edge is not trivial in the RefSCC graph!");
+#endif
+ // First insert it into the source or find the existing edge.
+ auto InsertResult = SourceN.EdgeIndexMap.insert(
+ {&TargetN.getFunction(), SourceN.Edges.size()});
+ if (!InsertResult.second)
+ // Already an edge, we're done.
+ return;
+
+ // Create the new edge.
+ SourceN.Edges.emplace_back(TargetN, Edge::Ref);
+
+ // Now that we have the edge, handle the graph fallout.
+ handleTrivialEdgeInsertion(SourceN, TargetN);
+}
+
void LazyCallGraph::insertEdge(Node &SourceN, Function &Target, Edge::Kind EK) {
assert(SCCMap.empty() && DFSStack.empty() &&
"This method cannot be called after SCCs have been formed!");
@@ -1240,6 +1526,93 @@ void LazyCallGraph::removeEdge(Node &SourceN, Function &Target) {
return SourceN.removeEdgeInternal(Target);
}
+void LazyCallGraph::removeDeadFunction(Function &F) {
+ // FIXME: This is unnecessarily restrictive. We should be able to remove
+ // functions which recursively call themselves.
+ assert(F.use_empty() &&
+ "This routine should only be called on trivially dead functions!");
+
+ auto EII = EntryIndexMap.find(&F);
+ if (EII != EntryIndexMap.end()) {
+ EntryEdges[EII->second] = Edge();
+ EntryIndexMap.erase(EII);
+ }
+
+ // It's safe to just remove un-visited functions from the RefSCC entry list.
+ // FIXME: This is a linear operation which could become hot and benefit from
+ // an index map.
+ auto RENI = find(RefSCCEntryNodes, &F);
+ if (RENI != RefSCCEntryNodes.end())
+ RefSCCEntryNodes.erase(RENI);
+
+ auto NI = NodeMap.find(&F);
+ if (NI == NodeMap.end())
+ // Not in the graph at all!
+ return;
+
+ Node &N = *NI->second;
+ NodeMap.erase(NI);
+
+ if (SCCMap.empty() && DFSStack.empty()) {
+ // No SCC walk has begun, so removing this is fine and there is nothing
+ // else necessary at this point but clearing out the node.
+ N.clear();
+ return;
+ }
+
+ // Check that we aren't going to break the DFS walk.
+ assert(all_of(DFSStack,
+ [&N](const std::pair<Node *, edge_iterator> &Element) {
+ return Element.first != &N;
+ }) &&
+ "Tried to remove a function currently in the DFS stack!");
+ assert(find(PendingRefSCCStack, &N) == PendingRefSCCStack.end() &&
+ "Tried to remove a function currently pending to add to a RefSCC!");
+
+ // Cannot remove a function which has yet to be visited in the DFS walk, so
+ // if we have a node at all then we must have an SCC and RefSCC.
+ auto CI = SCCMap.find(&N);
+ assert(CI != SCCMap.end() &&
+ "Tried to remove a node without an SCC after DFS walk started!");
+ SCC &C = *CI->second;
+ SCCMap.erase(CI);
+ RefSCC &RC = C.getOuterRefSCC();
+
+ // This node must be the only member of its SCC as it has no callers, and
+ // that SCC must be the only member of a RefSCC as it has no references.
+ // Validate these properties first.
+ assert(C.size() == 1 && "Dead functions must be in a singular SCC");
+ assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC");
+ assert(RC.Parents.empty() && "Cannot have parents of a dead RefSCC!");
+
+ // Now remove this RefSCC from any parents sets and the leaf list.
+ for (Edge &E : N)
+ if (Node *TargetN = E.getNode())
+ if (RefSCC *TargetRC = lookupRefSCC(*TargetN))
+ TargetRC->Parents.erase(&RC);
+ // FIXME: This is a linear operation which could become hot and benefit from
+ // an index map.
+ auto LRI = find(LeafRefSCCs, &RC);
+ if (LRI != LeafRefSCCs.end())
+ LeafRefSCCs.erase(LRI);
+
+ auto RCIndexI = RefSCCIndices.find(&RC);
+ int RCIndex = RCIndexI->second;
+ PostOrderRefSCCs.erase(PostOrderRefSCCs.begin() + RCIndex);
+ RefSCCIndices.erase(RCIndexI);
+ for (int i = RCIndex, Size = PostOrderRefSCCs.size(); i < Size; ++i)
+ RefSCCIndices[PostOrderRefSCCs[i]] = i;
+
+ // Finally clear out all the data structures from the node down through the
+ // components.
+ N.clear();
+ C.clear();
+ RC.clear();
+
+ // Nothing to delete as all the objects are allocated in stable bump pointer
+ // allocators.
+}
+
LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
return *new (MappedN = BPA.Allocate()) Node(*this, F);
}
@@ -1372,10 +1745,9 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
// root DFS number.
auto SCCNodes = make_range(
PendingSCCStack.rbegin(),
- std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(),
- [RootDFSNumber](Node *N) {
- return N->DFSNumber < RootDFSNumber;
- }));
+ find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) {
+ return N->DFSNumber < RootDFSNumber;
+ }));
// Form a new SCC out of these nodes and then clear them off our pending
// stack.
RC.SCCs.push_back(createSCC(RC, SCCNodes));
@@ -1411,19 +1783,19 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) {
IsLeaf = false;
}
- // For the SCCs where we fine no child SCCs, add them to the leaf list.
+ // For the SCCs where we find no child SCCs, add them to the leaf list.
if (IsLeaf)
LeafRefSCCs.push_back(&RC);
}
-LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
+bool LazyCallGraph::buildNextRefSCCInPostOrder() {
if (DFSStack.empty()) {
Node *N;
do {
// If we've handled all candidate entry nodes to the SCC forest, we're
// done.
if (RefSCCEntryNodes.empty())
- return nullptr;
+ return false;
N = &get(*RefSCCEntryNodes.pop_back_val());
} while (N->DFSNumber != 0);
@@ -1494,9 +1866,9 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
// root DFS number.
auto RefSCCNodes = node_stack_range(
PendingRefSCCStack.rbegin(),
- std::find_if(
- PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(),
- [RootDFSNumber](Node *N) { return N->DFSNumber < RootDFSNumber; }));
+ find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
+ return N->DFSNumber < RootDFSNumber;
+ }));
// Form a new RefSCC out of these nodes and then clear them off our pending
// stack.
RefSCC *NewRC = createRefSCC(*this);
@@ -1505,13 +1877,18 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
PendingRefSCCStack.erase(RefSCCNodes.end().base(),
PendingRefSCCStack.end());
- // We return the new node here. This essentially suspends the DFS walk
- // until another RefSCC is requested.
- return NewRC;
+ // Push the new node into the postorder list and return true indicating we
+ // successfully grew the postorder sequence by one.
+ bool Inserted =
+ RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
+ (void)Inserted;
+ assert(Inserted && "Cannot already have this RefSCC in the index map!");
+ PostOrderRefSCCs.push_back(NewRC);
+ return true;
}
}
-char LazyCallGraphAnalysis::PassID;
+AnalysisKey LazyCallGraphAnalysis::Key;
LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 4d09b7ca006b..4f6355236873 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
@@ -50,7 +51,7 @@ namespace llvm {
FunctionPass *createLazyValueInfoPass() { return new LazyValueInfoWrapperPass(); }
}
-char LazyValueAnalysis::PassID;
+AnalysisKey LazyValueAnalysis::Key;
//===----------------------------------------------------------------------===//
// LVILatticeVal
@@ -70,12 +71,14 @@ class LVILatticeVal {
/// "nothing known yet".
undefined,
- /// This Value has a specific constant value. (For integers, constantrange
- /// is used instead.)
+ /// This Value has a specific constant value. (For constant integers,
+ /// constantrange is used instead. Integer typed constantexprs can appear
+ /// as constant.)
constant,
- /// This Value is known to not have the specified value. (For integers,
- /// constantrange is used instead.)
+ /// This Value is known to not have the specified value. (For constant
+ /// integers, constantrange is used instead. As above, integer typed
+ /// constantexprs can appear here.)
notconstant,
/// The Value falls within this range. (Used only for integer typed values.)
@@ -139,37 +142,37 @@ public:
return Range;
}
- /// Return true if this is a change in status.
- bool markOverdefined() {
+private:
+ void markOverdefined() {
if (isOverdefined())
- return false;
+ return;
Tag = overdefined;
- return true;
}
- /// Return true if this is a change in status.
- bool markConstant(Constant *V) {
+ void markConstant(Constant *V) {
assert(V && "Marking constant with NULL");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
- return markConstantRange(ConstantRange(CI->getValue()));
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ markConstantRange(ConstantRange(CI->getValue()));
+ return;
+ }
if (isa<UndefValue>(V))
- return false;
+ return;
assert((!isConstant() || getConstant() == V) &&
"Marking constant with different value");
assert(isUndefined());
Tag = constant;
Val = V;
- return true;
}
- /// Return true if this is a change in status.
- bool markNotConstant(Constant *V) {
+ void markNotConstant(Constant *V) {
assert(V && "Marking constant with NULL");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
- return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ return;
+ }
if (isa<UndefValue>(V))
- return false;
+ return;
assert((!isConstant() || getConstant() != V) &&
"Marking constant !constant with same value");
@@ -178,100 +181,70 @@ public:
assert(isUndefined() || isConstant());
Tag = notconstant;
Val = V;
- return true;
}
- /// Return true if this is a change in status.
- bool markConstantRange(ConstantRange NewR) {
+ void markConstantRange(ConstantRange NewR) {
if (isConstantRange()) {
if (NewR.isEmptySet())
- return markOverdefined();
-
- bool changed = Range != NewR;
- Range = std::move(NewR);
- return changed;
+ markOverdefined();
+ else {
+ Range = std::move(NewR);
+ }
+ return;
}
assert(isUndefined());
if (NewR.isEmptySet())
- return markOverdefined();
-
- Tag = constantrange;
- Range = std::move(NewR);
- return true;
+ markOverdefined();
+ else {
+ Tag = constantrange;
+ Range = std::move(NewR);
+ }
}
+public:
+
/// Merge the specified lattice value into this one, updating this
/// one and returning true if anything changed.
- bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
- if (RHS.isUndefined() || isOverdefined()) return false;
- if (RHS.isOverdefined()) return markOverdefined();
+ void mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
+ if (RHS.isUndefined() || isOverdefined())
+ return;
+ if (RHS.isOverdefined()) {
+ markOverdefined();
+ return;
+ }
if (isUndefined()) {
- Tag = RHS.Tag;
- Val = RHS.Val;
- Range = RHS.Range;
- return true;
+ *this = RHS;
+ return;
}
if (isConstant()) {
- if (RHS.isConstant()) {
- if (Val == RHS.Val)
- return false;
- return markOverdefined();
- }
-
- if (RHS.isNotConstant()) {
- if (Val == RHS.Val)
- return markOverdefined();
-
- // Unless we can prove that the two Constants are different, we must
- // move to overdefined.
- if (ConstantInt *Res =
- dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
- CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL)))
- if (Res->isOne())
- return markNotConstant(RHS.getNotConstant());
-
- return markOverdefined();
- }
-
- return markOverdefined();
+ if (RHS.isConstant() && Val == RHS.Val)
+ return;
+ markOverdefined();
+ return;
}
if (isNotConstant()) {
- if (RHS.isConstant()) {
- if (Val == RHS.Val)
- return markOverdefined();
-
- // Unless we can prove that the two Constants are different, we must
- // move to overdefined.
- if (ConstantInt *Res =
- dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
- CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL)))
- if (Res->isOne())
- return false;
-
- return markOverdefined();
- }
-
- if (RHS.isNotConstant()) {
- if (Val == RHS.Val)
- return false;
- return markOverdefined();
- }
-
- return markOverdefined();
+ if (RHS.isNotConstant() && Val == RHS.Val)
+ return;
+ markOverdefined();
+ return;
}
assert(isConstantRange() && "New LVILattice type?");
- if (!RHS.isConstantRange())
- return markOverdefined();
-
+ if (!RHS.isConstantRange()) {
+ // We can get here if we've encountered a constantexpr of integer type
+ // and merge it with a constantrange.
+ markOverdefined();
+ return;
+ }
ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
if (NewR.isFullSet())
- return markOverdefined();
- return markConstantRange(NewR);
+ markOverdefined();
+ else
+ markConstantRange(NewR);
}
};
@@ -366,6 +339,9 @@ namespace {
/// A callback value handle updates the cache when values are erased.
class LazyValueInfoCache;
struct LVIValueHandle final : public CallbackVH {
+ // Needs to access getValPtr(), which is protected.
+ friend struct DenseMapInfo<LVIValueHandle>;
+
LazyValueInfoCache *Parent;
LVIValueHandle(Value *V, LazyValueInfoCache *P)
@@ -376,7 +352,7 @@ namespace {
deleted();
}
};
-}
+} // end anonymous namespace
namespace {
/// This is the cache kept by LazyValueInfo which
@@ -387,12 +363,15 @@ namespace {
/// entries, allowing us to do a lookup with a binary search.
/// Over-defined lattice values are recorded in OverDefinedCache to reduce
/// memory overhead.
- typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4>
- ValueCacheEntryTy;
+ struct ValueCacheEntryTy {
+ ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {}
+ LVIValueHandle Handle;
+ SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
+ };
/// This is all of the cached information for all values,
/// mapped from Value* to key information.
- std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+ DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
/// This tracks, on a per-block basis, the set of values that are
/// over-defined at the end of that block.
@@ -404,6 +383,183 @@ namespace {
/// don't spend time removing unused blocks from our caches.
DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+ public:
+ void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
+ SeenBlocks.insert(BB);
+
+ // Insert over-defined values into their own cache to reduce memory
+ // overhead.
+ if (Result.isOverdefined())
+ OverDefinedCache[BB].insert(Val);
+ else {
+ auto It = ValueCache.find_as(Val);
+ if (It == ValueCache.end()) {
+ ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this);
+ It = ValueCache.find_as(Val);
+ assert(It != ValueCache.end() && "Val was just added to the map!");
+ }
+ It->second->BlockVals[BB] = Result;
+ }
+ }
+
+ bool isOverdefined(Value *V, BasicBlock *BB) const {
+ auto ODI = OverDefinedCache.find(BB);
+
+ if (ODI == OverDefinedCache.end())
+ return false;
+
+ return ODI->second.count(V);
+ }
+
+ bool hasCachedValueInfo(Value *V, BasicBlock *BB) const {
+ if (isOverdefined(V, BB))
+ return true;
+
+ auto I = ValueCache.find_as(V);
+ if (I == ValueCache.end())
+ return false;
+
+ return I->second->BlockVals.count(BB);
+ }
+
+ LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) const {
+ if (isOverdefined(V, BB))
+ return LVILatticeVal::getOverdefined();
+
+ auto I = ValueCache.find_as(V);
+ if (I == ValueCache.end())
+ return LVILatticeVal();
+ auto BBI = I->second->BlockVals.find(BB);
+ if (BBI == I->second->BlockVals.end())
+ return LVILatticeVal();
+ return BBI->second;
+ }
+
+ /// clear - Empty the cache.
+ void clear() {
+ SeenBlocks.clear();
+ ValueCache.clear();
+ OverDefinedCache.clear();
+ }
+
+ /// Inform the cache that a given value has been deleted.
+ void eraseValue(Value *V);
+
+ /// This is part of the update interface to inform the cache
+ /// that a block has been deleted.
+ void eraseBlock(BasicBlock *BB);
+
+ /// Updates the cache to remove any influence an overdefined value in
+ /// OldSucc might have (unless also overdefined in NewSucc). This just
+ /// flushes elements from the cache and does not add any.
+ void threadEdgeImpl(BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+ friend struct LVIValueHandle;
+ };
+}
+
+void LazyValueInfoCache::eraseValue(Value *V) {
+ SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
+ for (auto &I : OverDefinedCache) {
+ SmallPtrSetImpl<Value *> &ValueSet = I.second;
+ ValueSet.erase(V);
+ if (ValueSet.empty())
+ ToErase.push_back(I.first);
+ }
+ for (auto &BB : ToErase)
+ OverDefinedCache.erase(BB);
+
+ ValueCache.erase(V);
+}
+
+void LVIValueHandle::deleted() {
+ // This erasure deallocates *this, so it MUST happen after we're done
+ // using any and all members of *this.
+ Parent->eraseValue(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ // Shortcut if we have never seen this block.
+ DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+ if (I == SeenBlocks.end())
+ return;
+ SeenBlocks.erase(I);
+
+ auto ODI = OverDefinedCache.find(BB);
+ if (ODI != OverDefinedCache.end())
+ OverDefinedCache.erase(ODI);
+
+ for (auto &I : ValueCache)
+ I.second->BlockVals.erase(BB);
+}
+
+void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be
+ // possible to solve now. We do NOT try to proactively update these values.
+ // Instead, we clear their entries from the cache, and allow lazy updating to
+ // recompute them when needed.
+
+ // The updating process is fairly simple: we need to drop cached info
+ // for all values that were marked overdefined in OldSucc, and for those same
+ // values in any successor of OldSucc (except NewSucc) in which they were
+ // also marked overdefined.
+ std::vector<BasicBlock*> worklist;
+ worklist.push_back(OldSucc);
+
+ auto I = OverDefinedCache.find(OldSucc);
+ if (I == OverDefinedCache.end())
+ return; // Nothing to process here.
+ SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
+
+ // Use a worklist to perform a depth-first search of OldSucc's successors.
+ // NOTE: We do not need a visited list since any blocks we have already
+ // visited will have had their overdefined markers cleared already, and we
+ // thus won't loop to their successors.
+ while (!worklist.empty()) {
+ BasicBlock *ToUpdate = worklist.back();
+ worklist.pop_back();
+
+ // Skip blocks only accessible through NewSucc.
+ if (ToUpdate == NewSucc) continue;
+
+ // If a value was marked overdefined in OldSucc, and is here too...
+ auto OI = OverDefinedCache.find(ToUpdate);
+ if (OI == OverDefinedCache.end())
+ continue;
+ SmallPtrSetImpl<Value *> &ValueSet = OI->second;
+
+ bool changed = false;
+ for (Value *V : ValsToClear) {
+ if (!ValueSet.erase(V))
+ continue;
+
+ // If we removed anything, then we potentially need to update
+ // blocks successors too.
+ changed = true;
+
+ if (ValueSet.empty()) {
+ OverDefinedCache.erase(OI);
+ break;
+ }
+ }
+
+ if (!changed) continue;
+
+ worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+ }
+}
+
+namespace {
+ // The actual implementation of the lazy analysis and update. Note that the
+ // inheritance from LazyValueInfoCache is intended to be temporary while
+ // splitting the code and then transitioning to a has-a relationship.
+ class LazyValueInfoImpl {
+
+ /// Cached results from previous queries
+ LazyValueInfoCache TheCache;
+
/// This stack holds the state of the value solver during a query.
/// It basically emulates the callstack of the naive
/// recursive value lookup process.
@@ -428,19 +584,6 @@ namespace {
const DataLayout &DL; ///< A mandatory DataLayout
DominatorTree *DT; ///< An optional DT pointer.
- friend struct LVIValueHandle;
-
- void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
- SeenBlocks.insert(BB);
-
- // Insert over-defined values into their own cache to reduce memory
- // overhead.
- if (Result.isOverdefined())
- OverDefinedCache[BB].insert(Val);
- else
- lookup(Val)[BB] = Result;
- }
-
LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
LVILatticeVal &Result, Instruction *CxtI = nullptr);
@@ -450,6 +593,7 @@ namespace {
// returned means that the work item was not completely processed and must
// be revisited after going through the new items.
bool solveBlockValue(Value *Val, BasicBlock *BB);
+ bool solveBlockValueImpl(LVILatticeVal &Res, Value *Val, BasicBlock *BB);
bool solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *Val, BasicBlock *BB);
bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB);
bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S,
@@ -458,43 +602,12 @@ namespace {
BasicBlock *BB);
bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI,
BasicBlock *BB);
- void intersectAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV,
+ void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
+ LVILatticeVal &BBLV,
Instruction *BBI);
void solve();
- ValueCacheEntryTy &lookup(Value *V) {
- return ValueCache[LVIValueHandle(V, this)];
- }
-
- bool isOverdefined(Value *V, BasicBlock *BB) const {
- auto ODI = OverDefinedCache.find(BB);
-
- if (ODI == OverDefinedCache.end())
- return false;
-
- return ODI->second.count(V);
- }
-
- bool hasCachedValueInfo(Value *V, BasicBlock *BB) {
- if (isOverdefined(V, BB))
- return true;
-
- LVIValueHandle ValHandle(V, this);
- auto I = ValueCache.find(ValHandle);
- if (I == ValueCache.end())
- return false;
-
- return I->second.count(BB);
- }
-
- LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) {
- if (isOverdefined(V, BB))
- return LVILatticeVal::getOverdefined();
-
- return lookup(V)[BB];
- }
-
public:
/// This is the query interface to determine the lattice
/// value for the specified Value* at the end of the specified block.
@@ -511,60 +624,28 @@ namespace {
LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
Instruction *CxtI = nullptr);
- /// This is the update interface to inform the cache that an edge from
- /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
- void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+ /// Complete flush all previously computed values
+ void clear() {
+ TheCache.clear();
+ }
/// This is part of the update interface to inform the cache
/// that a block has been deleted.
- void eraseBlock(BasicBlock *BB);
-
- /// clear - Empty the cache.
- void clear() {
- SeenBlocks.clear();
- ValueCache.clear();
- OverDefinedCache.clear();
+ void eraseBlock(BasicBlock *BB) {
+ TheCache.eraseBlock(BB);
}
- LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL,
+ /// This is the update interface to inform the cache that an edge from
+ /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
+ void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+ LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL,
DominatorTree *DT = nullptr)
: AC(AC), DL(DL), DT(DT) {}
};
} // end anonymous namespace
-void LVIValueHandle::deleted() {
- SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
- for (auto &I : Parent->OverDefinedCache) {
- SmallPtrSetImpl<Value *> &ValueSet = I.second;
- if (ValueSet.count(getValPtr()))
- ValueSet.erase(getValPtr());
- if (ValueSet.empty())
- ToErase.push_back(I.first);
- }
- for (auto &BB : ToErase)
- Parent->OverDefinedCache.erase(BB);
-
- // This erasure deallocates *this, so it MUST happen after we're done
- // using any and all members of *this.
- Parent->ValueCache.erase(*this);
-}
-
-void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
- // Shortcut if we have never seen this block.
- DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
- if (I == SeenBlocks.end())
- return;
- SeenBlocks.erase(I);
-
- auto ODI = OverDefinedCache.find(BB);
- if (ODI != OverDefinedCache.end())
- OverDefinedCache.erase(ODI);
-
- for (auto &I : ValueCache)
- I.second.erase(BB);
-}
-
-void LazyValueInfoCache::solve() {
+void LazyValueInfoImpl::solve() {
while (!BlockValueStack.empty()) {
std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
@@ -572,11 +653,11 @@ void LazyValueInfoCache::solve() {
if (solveBlockValue(e.second, e.first)) {
// The work item was completely processed.
assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
- assert(hasCachedValueInfo(e.second, e.first) &&
+ assert(TheCache.hasCachedValueInfo(e.second, e.first) &&
"Result should be in cache!");
DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName()
- << " = " << getCachedValueInfo(e.second, e.first) << "\n");
+ << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
BlockValueStack.pop();
BlockValueSet.erase(e);
@@ -587,21 +668,20 @@ void LazyValueInfoCache::solve() {
}
}
-bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) {
// If already a constant, there is nothing to compute.
if (isa<Constant>(Val))
return true;
- return hasCachedValueInfo(Val, BB);
+ return TheCache.hasCachedValueInfo(Val, BB);
}
-LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+LVILatticeVal LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB) {
// If already a constant, there is nothing to compute.
if (Constant *VC = dyn_cast<Constant>(Val))
return LVILatticeVal::get(VC);
- SeenBlocks.insert(BB);
- return getCachedValueInfo(Val, BB);
+ return TheCache.getCachedValueInfo(Val, BB);
}
static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
@@ -610,7 +690,7 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
case Instruction::Load:
case Instruction::Call:
case Instruction::Invoke:
- if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))
+ if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))
if (isa<IntegerType>(BBI->getType())) {
return LVILatticeVal::getRange(getConstantRangeFromMetadata(*Ranges));
}
@@ -620,14 +700,14 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
return LVILatticeVal::getOverdefined();
}
-bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
if (isa<Constant>(Val))
return true;
- if (hasCachedValueInfo(Val, BB)) {
+ if (TheCache.hasCachedValueInfo(Val, BB)) {
// If we have a cached value, use that.
DEBUG(dbgs() << " reuse BB '" << BB->getName()
- << "' val=" << getCachedValueInfo(Val, BB) << '\n');
+ << "' val=" << TheCache.getCachedValueInfo(Val, BB) << '\n');
// Since we're reusing a cached value, we don't need to update the
// OverDefinedCache. The cache will have been properly updated whenever the
@@ -638,28 +718,26 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
// Hold off inserting this value into the Cache in case we have to return
// false and come back later.
LVILatticeVal Res;
+ if (!solveBlockValueImpl(Res, Val, BB))
+ // Work pushed, will revisit
+ return false;
+
+ TheCache.insertResult(Val, BB, Res);
+ return true;
+}
+
+bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
+ Value *Val, BasicBlock *BB) {
Instruction *BBI = dyn_cast<Instruction>(Val);
- if (!BBI || BBI->getParent() != BB) {
- if (!solveBlockValueNonLocal(Res, Val, BB))
- return false;
- insertResult(Val, BB, Res);
- return true;
- }
+ if (!BBI || BBI->getParent() != BB)
+ return solveBlockValueNonLocal(Res, Val, BB);
- if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- if (!solveBlockValuePHINode(Res, PN, BB))
- return false;
- insertResult(Val, BB, Res);
- return true;
- }
+ if (PHINode *PN = dyn_cast<PHINode>(BBI))
+ return solveBlockValuePHINode(Res, PN, BB);
- if (auto *SI = dyn_cast<SelectInst>(BBI)) {
- if (!solveBlockValueSelect(Res, SI, BB))
- return false;
- insertResult(Val, BB, Res);
- return true;
- }
+ if (auto *SI = dyn_cast<SelectInst>(BBI))
+ return solveBlockValueSelect(Res, SI, BB);
// If this value is a nonnull pointer, record it's range and bailout. Note
// that for all other pointer typed values, we terminate the search at the
@@ -673,29 +751,20 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
PointerType *PT = dyn_cast<PointerType>(BBI->getType());
if (PT && isKnownNonNull(BBI)) {
Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
- insertResult(Val, BB, Res);
return true;
}
if (BBI->getType()->isIntegerTy()) {
- if (isa<CastInst>(BBI)) {
- if (!solveBlockValueCast(Res, BBI, BB))
- return false;
- insertResult(Val, BB, Res);
- return true;
- }
+ if (isa<CastInst>(BBI))
+ return solveBlockValueCast(Res, BBI, BB);
+
BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
- if (BO && isa<ConstantInt>(BO->getOperand(1))) {
- if (!solveBlockValueBinaryOp(Res, BBI, BB))
- return false;
- insertResult(Val, BB, Res);
- return true;
- }
+ if (BO && isa<ConstantInt>(BO->getOperand(1)))
+ return solveBlockValueBinaryOp(Res, BBI, BB);
}
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - unknown inst def found.\n");
Res = getFromRangeMetadata(BBI);
- insertResult(Val, BB, Res);
return true;
}
@@ -748,7 +817,7 @@ static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) {
return false;
}
-bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
Value *Val, BasicBlock *BB) {
LVILatticeVal Result; // Start Undefined.
@@ -763,7 +832,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
PointerType *PTy = cast<PointerType>(Val->getType());
Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
} else {
- Result.markOverdefined();
+ Result = LVILatticeVal::getOverdefined();
}
BBLV = Result;
return true;
@@ -785,7 +854,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
if (Result.isOverdefined()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because of pred (non local).\n");
- // Bofore giving up, see if we can prove the pointer non-null local to
+ // Before giving up, see if we can prove the pointer non-null local to
// this particular block.
if (Val->getType()->isPointerTy() &&
isObjectDereferencedInBlock(Val, BB)) {
@@ -806,7 +875,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
return true;
}
-bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
PHINode *PN, BasicBlock *BB) {
LVILatticeVal Result; // Start Undefined.
@@ -845,15 +914,13 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
return true;
}
-static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
- LVILatticeVal &Result,
- bool isTrueDest = true);
+static LVILatticeVal getValueFromCondition(Value *Val, Value *Cond,
+ bool isTrueDest = true);
// If we can determine a constraint on the value given conditions assumed by
// the program, intersect those constraints with BBLV
-void LazyValueInfoCache::intersectAssumeBlockValueConstantRange(Value *Val,
- LVILatticeVal &BBLV,
- Instruction *BBI) {
+void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
+ Value *Val, LVILatticeVal &BBLV, Instruction *BBI) {
BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
if (!BBI)
return;
@@ -865,44 +932,52 @@ void LazyValueInfoCache::intersectAssumeBlockValueConstantRange(Value *Val,
if (!isValidAssumeForContext(I, BBI, DT))
continue;
- Value *C = I->getArgOperand(0);
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(C)) {
- LVILatticeVal Result;
- if (getValueFromFromCondition(Val, ICI, Result))
- BBLV = intersect(BBLV, Result);
- }
+ BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0)));
+ }
+
+ // If guards are not used in the module, don't spend time looking for them
+ auto *GuardDecl = BBI->getModule()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ if (!GuardDecl || GuardDecl->use_empty())
+ return;
+
+ for (Instruction &I : make_range(BBI->getIterator().getReverse(),
+ BBI->getParent()->rend())) {
+ Value *Cond = nullptr;
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
+ BBLV = intersect(BBLV, getValueFromCondition(Val, Cond));
}
}
-bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
SelectInst *SI, BasicBlock *BB) {
// Recurse on our inputs if needed
if (!hasBlockValue(SI->getTrueValue(), BB)) {
if (pushBlockValue(std::make_pair(BB, SI->getTrueValue())))
return false;
- BBLV.markOverdefined();
+ BBLV = LVILatticeVal::getOverdefined();
return true;
}
LVILatticeVal TrueVal = getBlockValue(SI->getTrueValue(), BB);
// If we hit overdefined, don't ask more queries. We want to avoid poisoning
// extra slots in the table if we can.
if (TrueVal.isOverdefined()) {
- BBLV.markOverdefined();
+ BBLV = LVILatticeVal::getOverdefined();
return true;
}
if (!hasBlockValue(SI->getFalseValue(), BB)) {
if (pushBlockValue(std::make_pair(BB, SI->getFalseValue())))
return false;
- BBLV.markOverdefined();
+ BBLV = LVILatticeVal::getOverdefined();
return true;
}
LVILatticeVal FalseVal = getBlockValue(SI->getFalseValue(), BB);
// If we hit overdefined, don't ask more queries. We want to avoid poisoning
// extra slots in the table if we can.
if (FalseVal.isOverdefined()) {
- BBLV.markOverdefined();
+ BBLV = LVILatticeVal::getOverdefined();
return true;
}
@@ -916,22 +991,22 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
// ValueTracking getting smarter looking back past our immediate inputs.)
if (SelectPatternResult::isMinOrMax(SPR.Flavor) &&
LHS == SI->getTrueValue() && RHS == SI->getFalseValue()) {
- switch (SPR.Flavor) {
- default:
- llvm_unreachable("unexpected minmax type!");
- case SPF_SMIN: /// Signed minimum
- BBLV.markConstantRange(TrueCR.smin(FalseCR));
- return true;
- case SPF_UMIN: /// Unsigned minimum
- BBLV.markConstantRange(TrueCR.umin(FalseCR));
- return true;
- case SPF_SMAX: /// Signed maximum
- BBLV.markConstantRange(TrueCR.smax(FalseCR));
- return true;
- case SPF_UMAX: /// Unsigned maximum
- BBLV.markConstantRange(TrueCR.umax(FalseCR));
- return true;
- };
+ ConstantRange ResultCR = [&]() {
+ switch (SPR.Flavor) {
+ default:
+ llvm_unreachable("unexpected minmax type!");
+ case SPF_SMIN: /// Signed minimum
+ return TrueCR.smin(FalseCR);
+ case SPF_UMIN: /// Unsigned minimum
+ return TrueCR.umin(FalseCR);
+ case SPF_SMAX: /// Signed maximum
+ return TrueCR.smax(FalseCR);
+ case SPF_UMAX: /// Unsigned maximum
+ return TrueCR.umax(FalseCR);
+ };
+ }();
+ BBLV = LVILatticeVal::getRange(ResultCR);
+ return true;
}
// TODO: ABS, NABS from the SelectPatternResult
@@ -940,27 +1015,21 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
// Can we constrain the facts about the true and false values by using the
// condition itself? This shows up with idioms like e.g. select(a > 5, a, 5).
// TODO: We could potentially refine an overdefined true value above.
- if (auto *ICI = dyn_cast<ICmpInst>(SI->getCondition())) {
- LVILatticeVal TrueValTaken, FalseValTaken;
- if (!getValueFromFromCondition(SI->getTrueValue(), ICI,
- TrueValTaken, true))
- TrueValTaken.markOverdefined();
- if (!getValueFromFromCondition(SI->getFalseValue(), ICI,
- FalseValTaken, false))
- FalseValTaken.markOverdefined();
-
- TrueVal = intersect(TrueVal, TrueValTaken);
- FalseVal = intersect(FalseVal, FalseValTaken);
-
-
- // Handle clamp idioms such as:
- // %24 = constantrange<0, 17>
- // %39 = icmp eq i32 %24, 0
- // %40 = add i32 %24, -1
- // %siv.next = select i1 %39, i32 16, i32 %40
- // %siv.next = constantrange<0, 17> not <-1, 17>
- // In general, this can handle any clamp idiom which tests the edge
- // condition via an equality or inequality.
+ Value *Cond = SI->getCondition();
+ TrueVal = intersect(TrueVal,
+ getValueFromCondition(SI->getTrueValue(), Cond, true));
+ FalseVal = intersect(FalseVal,
+ getValueFromCondition(SI->getFalseValue(), Cond, false));
+
+ // Handle clamp idioms such as:
+ // %24 = constantrange<0, 17>
+ // %39 = icmp eq i32 %24, 0
+ // %40 = add i32 %24, -1
+ // %siv.next = select i1 %39, i32 16, i32 %40
+ // %siv.next = constantrange<0, 17> not <-1, 17>
+ // In general, this can handle any clamp idiom which tests the edge
+ // condition via an equality or inequality.
+ if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *A = ICI->getOperand(0);
if (ConstantInt *CIBase = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
@@ -1001,13 +1070,13 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
return true;
}
-bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
Instruction *BBI,
BasicBlock *BB) {
if (!BBI->getOperand(0)->getType()->isSized()) {
// Without knowing how wide the input is, we can't analyze it in any useful
// way.
- BBLV.markOverdefined();
+ BBLV = LVILatticeVal::getOverdefined();
return true;
}
@@ -1024,7 +1093,7 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
// Unhandled instructions are overdefined.
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined (unknown cast).\n");
- BBLV.markOverdefined();
+ BBLV = LVILatticeVal::getOverdefined();
return true;
}
@@ -1041,7 +1110,8 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
if (hasBlockValue(BBI->getOperand(0), BB)) {
LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
- intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
+ intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
+ BBI);
if (LHSVal.isConstantRange())
LHSRange = LHSVal.getConstantRange();
}
@@ -1052,31 +1122,12 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
- LVILatticeVal Result;
- switch (BBI->getOpcode()) {
- case Instruction::Trunc:
- Result.markConstantRange(LHSRange.truncate(ResultBitWidth));
- break;
- case Instruction::SExt:
- Result.markConstantRange(LHSRange.signExtend(ResultBitWidth));
- break;
- case Instruction::ZExt:
- Result.markConstantRange(LHSRange.zeroExtend(ResultBitWidth));
- break;
- case Instruction::BitCast:
- Result.markConstantRange(LHSRange);
- break;
- default:
- // Should be dead if the code above is correct
- llvm_unreachable("inconsistent with above");
- break;
- }
-
- BBLV = Result;
+ auto CastOp = (Instruction::CastOps) BBI->getOpcode();
+ BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth));
return true;
}
-bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
Instruction *BBI,
BasicBlock *BB) {
@@ -1101,7 +1152,7 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
// Unhandled instructions are overdefined.
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined (unknown binary operator).\n");
- BBLV.markOverdefined();
+ BBLV = LVILatticeVal::getOverdefined();
return true;
};
@@ -1118,7 +1169,8 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
if (hasBlockValue(BBI->getOperand(0), BB)) {
LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
- intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
+ intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
+ BBI);
if (LHSVal.isConstantRange())
LHSRange = LHSVal.getConstantRange();
}
@@ -1129,82 +1181,114 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
- LVILatticeVal Result;
- switch (BBI->getOpcode()) {
- case Instruction::Add:
- Result.markConstantRange(LHSRange.add(RHSRange));
- break;
- case Instruction::Sub:
- Result.markConstantRange(LHSRange.sub(RHSRange));
- break;
- case Instruction::Mul:
- Result.markConstantRange(LHSRange.multiply(RHSRange));
- break;
- case Instruction::UDiv:
- Result.markConstantRange(LHSRange.udiv(RHSRange));
- break;
- case Instruction::Shl:
- Result.markConstantRange(LHSRange.shl(RHSRange));
- break;
- case Instruction::LShr:
- Result.markConstantRange(LHSRange.lshr(RHSRange));
- break;
- case Instruction::And:
- Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
- break;
- case Instruction::Or:
- Result.markConstantRange(LHSRange.binaryOr(RHSRange));
- break;
- default:
- // Should be dead if the code above is correct
- llvm_unreachable("inconsistent with above");
- break;
- }
-
- BBLV = Result;
+ auto BinOp = (Instruction::BinaryOps) BBI->getOpcode();
+ BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange));
return true;
}
-bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
- LVILatticeVal &Result, bool isTrueDest) {
- assert(ICI && "precondition");
- if (isa<Constant>(ICI->getOperand(1))) {
- if (ICI->isEquality() && ICI->getOperand(0) == Val) {
+static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
+ bool isTrueDest) {
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ CmpInst::Predicate Predicate = ICI->getPredicate();
+
+ if (isa<Constant>(RHS)) {
+ if (ICI->isEquality() && LHS == Val) {
// We know that V has the RHS constant if this is a true SETEQ or
// false SETNE.
- if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
- Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ))
+ return LVILatticeVal::get(cast<Constant>(RHS));
else
- Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
- return true;
+ return LVILatticeVal::getNot(cast<Constant>(RHS));
}
+ }
- // Recognize the range checking idiom that InstCombine produces.
- // (X-C1) u< C2 --> [C1, C1+C2)
- ConstantInt *NegOffset = nullptr;
- if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
- match(ICI->getOperand(0), m_Add(m_Specific(Val),
- m_ConstantInt(NegOffset)));
-
- ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
- if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
- // Calculate the range of values that are allowed by the comparison
- ConstantRange CmpRange(CI->getValue());
- ConstantRange TrueValues =
- ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange);
+ if (!Val->getType()->isIntegerTy())
+ return LVILatticeVal::getOverdefined();
+
+ // Use ConstantRange::makeAllowedICmpRegion in order to determine the possible
+ // range of Val guaranteed by the condition. Recognize comparisons in the from
+ // of:
+ // icmp <pred> Val, ...
+ // icmp <pred> (add Val, Offset), ...
+ // The latter is the range checking idiom that InstCombine produces. Subtract
+ // the offset from the allowed range for RHS in this case.
+
+ // Val or (add Val, Offset) can be on either hand of the comparison
+ if (LHS != Val && !match(LHS, m_Add(m_Specific(Val), m_ConstantInt()))) {
+ std::swap(LHS, RHS);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
- if (NegOffset) // Apply the offset from above.
- TrueValues = TrueValues.subtract(NegOffset->getValue());
+ ConstantInt *Offset = nullptr;
+ if (LHS != Val)
+ match(LHS, m_Add(m_Specific(Val), m_ConstantInt(Offset)));
+
+ if (LHS == Val || Offset) {
+ // Calculate the range of values that are allowed by the comparison
+ ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(),
+ /*isFullSet=*/true);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
+ RHSRange = ConstantRange(CI->getValue());
+ else if (Instruction *I = dyn_cast<Instruction>(RHS))
+ if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+ RHSRange = getConstantRangeFromMetadata(*Ranges);
+
+ // If we're interested in the false dest, invert the condition
+ CmpInst::Predicate Pred =
+ isTrueDest ? Predicate : CmpInst::getInversePredicate(Predicate);
+ ConstantRange TrueValues =
+ ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
- // If we're interested in the false dest, invert the condition.
- if (!isTrueDest) TrueValues = TrueValues.inverse();
+ if (Offset) // Apply the offset from above.
+ TrueValues = TrueValues.subtract(Offset->getValue());
- Result = LVILatticeVal::getRange(std::move(TrueValues));
- return true;
- }
+ return LVILatticeVal::getRange(std::move(TrueValues));
}
- return false;
+ return LVILatticeVal::getOverdefined();
+}
+
+static LVILatticeVal
+getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
+ DenseMap<Value*, LVILatticeVal> &Visited);
+
+static LVILatticeVal
+getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
+ DenseMap<Value*, LVILatticeVal> &Visited) {
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
+ return getValueFromICmpCondition(Val, ICI, isTrueDest);
+
+ // Handle conditions in the form of (cond1 && cond2), we know that on the
+ // true dest path both of the conditions hold.
+ if (!isTrueDest)
+ return LVILatticeVal::getOverdefined();
+
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond);
+ if (!BO || BO->getOpcode() != BinaryOperator::And)
+ return LVILatticeVal::getOverdefined();
+
+ auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited);
+ auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited);
+ return intersect(RHS, LHS);
+}
+
+static LVILatticeVal
+getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
+ DenseMap<Value*, LVILatticeVal> &Visited) {
+ auto I = Visited.find(Cond);
+ if (I != Visited.end())
+ return I->second;
+
+ auto Result = getValueFromConditionImpl(Val, Cond, isTrueDest, Visited);
+ Visited[Cond] = Result;
+ return Result;
+}
+
+LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest) {
+ assert(Cond && "precondition");
+ DenseMap<Value*, LVILatticeVal> Visited;
+ return getValueFromCondition(Val, Cond, isTrueDest, Visited);
}
/// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
@@ -1233,9 +1317,9 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
- return true;
+ Result = getValueFromCondition(Val, BI->getCondition(), isTrueDest);
+ if (!Result.isOverdefined())
+ return true;
}
}
@@ -1267,7 +1351,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
/// the basic block if the edge does not constrain Val.
-bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
BasicBlock *BBTo, LVILatticeVal &Result,
Instruction *CxtI) {
// If already a constant, there is nothing to compute.
@@ -1280,7 +1364,7 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
if (!getEdgeValueLocal(Val, BBFrom, BBTo, LocalResult))
// If we couldn't constrain the value on the edge, LocalResult doesn't
// provide any information.
- LocalResult.markOverdefined();
+ LocalResult = LVILatticeVal::getOverdefined();
if (hasSingleValue(LocalResult)) {
// Can't get any more precise here
@@ -1298,39 +1382,40 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
// Try to intersect ranges of the BB and the constraint on the edge.
LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
- intersectAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator());
+ intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock,
+ BBFrom->getTerminator());
// We can use the context instruction (generically the ultimate instruction
// the calling pass is trying to simplify) here, even though the result of
// this function is generally cached when called from the solve* functions
// (and that cached result might be used with queries using a different
// context instruction), because when this function is called from the solve*
// functions, the context instruction is not provided. When called from
- // LazyValueInfoCache::getValueOnEdge, the context instruction is provided,
+ // LazyValueInfoImpl::getValueOnEdge, the context instruction is provided,
// but then the result is not cached.
- intersectAssumeBlockValueConstantRange(Val, InBlock, CxtI);
+ intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI);
Result = intersect(LocalResult, InBlock);
return true;
}
-LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
+LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
assert(BlockValueStack.empty() && BlockValueSet.empty());
if (!hasBlockValue(V, BB)) {
- pushBlockValue(std::make_pair(BB, V));
+ pushBlockValue(std::make_pair(BB, V));
solve();
}
LVILatticeVal Result = getBlockValue(V, BB);
- intersectAssumeBlockValueConstantRange(V, Result, CxtI);
+ intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
-LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
+LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
<< CxtI->getName() << "'\n");
@@ -1340,13 +1425,13 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
LVILatticeVal Result = LVILatticeVal::getOverdefined();
if (auto *I = dyn_cast<Instruction>(V))
Result = getFromRangeMetadata(I);
- intersectAssumeBlockValueConstantRange(V, Result, CxtI);
+ intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
-LVILatticeVal LazyValueInfoCache::
+LVILatticeVal LazyValueInfoImpl::
getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
@@ -1364,75 +1449,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
return Result;
}
-void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
- BasicBlock *NewSucc) {
- // When an edge in the graph has been threaded, values that we could not
- // determine a value for before (i.e. were marked overdefined) may be
- // possible to solve now. We do NOT try to proactively update these values.
- // Instead, we clear their entries from the cache, and allow lazy updating to
- // recompute them when needed.
-
- // The updating process is fairly simple: we need to drop cached info
- // for all values that were marked overdefined in OldSucc, and for those same
- // values in any successor of OldSucc (except NewSucc) in which they were
- // also marked overdefined.
- std::vector<BasicBlock*> worklist;
- worklist.push_back(OldSucc);
-
- auto I = OverDefinedCache.find(OldSucc);
- if (I == OverDefinedCache.end())
- return; // Nothing to process here.
- SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
-
- // Use a worklist to perform a depth-first search of OldSucc's successors.
- // NOTE: We do not need a visited list since any blocks we have already
- // visited will have had their overdefined markers cleared already, and we
- // thus won't loop to their successors.
- while (!worklist.empty()) {
- BasicBlock *ToUpdate = worklist.back();
- worklist.pop_back();
-
- // Skip blocks only accessible through NewSucc.
- if (ToUpdate == NewSucc) continue;
-
- bool changed = false;
- for (Value *V : ValsToClear) {
- // If a value was marked overdefined in OldSucc, and is here too...
- auto OI = OverDefinedCache.find(ToUpdate);
- if (OI == OverDefinedCache.end())
- continue;
- SmallPtrSetImpl<Value *> &ValueSet = OI->second;
- if (!ValueSet.count(V))
- continue;
-
- ValueSet.erase(V);
- if (ValueSet.empty())
- OverDefinedCache.erase(OI);
-
- // If we removed anything, then we potentially need to update
- // blocks successors too.
- changed = true;
- }
-
- if (!changed) continue;
-
- worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
- }
+void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ TheCache.threadEdgeImpl(OldSucc, NewSucc);
}
//===----------------------------------------------------------------------===//
// LazyValueInfo Impl
//===----------------------------------------------------------------------===//
-/// This lazily constructs the LazyValueInfoCache.
-static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
- const DataLayout *DL,
- DominatorTree *DT = nullptr) {
+/// This lazily constructs the LazyValueInfoImpl.
+static LazyValueInfoImpl &getImpl(void *&PImpl, AssumptionCache *AC,
+ const DataLayout *DL,
+ DominatorTree *DT = nullptr) {
if (!PImpl) {
assert(DL && "getCache() called with a null DataLayout");
- PImpl = new LazyValueInfoCache(AC, *DL, DT);
+ PImpl = new LazyValueInfoImpl(AC, *DL, DT);
}
- return *static_cast<LazyValueInfoCache*>(PImpl);
+ return *static_cast<LazyValueInfoImpl*>(PImpl);
}
bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
@@ -1445,7 +1479,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (Info.PImpl)
- getCache(Info.PImpl, Info.AC, &DL, Info.DT).clear();
+ getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear();
// Fully lazy.
return false;
@@ -1464,7 +1498,7 @@ LazyValueInfo::~LazyValueInfo() { releaseMemory(); }
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
- delete &getCache(PImpl, AC, nullptr);
+ delete &getImpl(PImpl, AC, nullptr);
PImpl = nullptr;
}
}
@@ -1479,11 +1513,29 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM)
return LazyValueInfo(&AC, &TLI, DT);
}
+/// Returns true if we can statically tell that this value will never be a
+/// "useful" constant. In practice, this means we've got something like an
+/// alloca or a malloc call for which a comparison against a constant can
+/// only be guarding dead code. Note that we are potentially giving up some
+/// precision in dead code (a constant result) in favour of avoiding a
+/// expensive search for a easily answered common query.
+static bool isKnownNonConstant(Value *V) {
+ V = V->stripPointerCasts();
+ // The return val of alloc cannot be a Constant.
+ if (isa<AllocaInst>(V))
+ return true;
+ return false;
+}
+
Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
Instruction *CxtI) {
+ // Bail out early if V is known not to be a Constant.
+ if (isKnownNonConstant(V))
+ return nullptr;
+
const DataLayout &DL = BB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
+ getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1501,12 +1553,15 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
unsigned Width = V->getType()->getIntegerBitWidth();
const DataLayout &DL = BB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
- assert(!Result.isConstant());
+ getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isUndefined())
return ConstantRange(Width, /*isFullSet=*/false);
if (Result.isConstantRange())
return Result.getConstantRange();
+ // We represent ConstantInt constants as constant ranges but other kinds
+ // of integer constants, i.e. ConstantExpr will be tagged as constants
+ assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
+ "ConstantInt value must be represented as constantrange");
return ConstantRange(Width, /*isFullSet=*/true);
}
@@ -1517,7 +1572,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
Instruction *CxtI) {
const DataLayout &DL = FromBB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1564,8 +1619,8 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
}
// Handle more complex predicates.
- ConstantRange TrueValues =
- ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+ ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
+ (ICmpInst::Predicate)Pred, CI->getValue());
if (TrueValues.contains(CR))
return LazyValueInfo::True;
if (TrueValues.inverse().contains(CR))
@@ -1605,7 +1660,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI) {
const DataLayout &DL = FromBB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
}
@@ -1613,8 +1668,19 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
LazyValueInfo::Tristate
LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI) {
+ // Is or is not NonNull are common predicates being queried. If
+ // isKnownNonNull can tell us the result of the predicate, we can
+ // return it quickly. But this is only a fastpath, and falling
+ // through would still be correct.
+ if (V->getType()->isPointerTy() && C->isNullValue() &&
+ isKnownNonNull(V->stripPointerCasts())) {
+ if (Pred == ICmpInst::ICMP_EQ)
+ return LazyValueInfo::False;
+ else if (Pred == ICmpInst::ICMP_NE)
+ return LazyValueInfo::True;
+ }
const DataLayout &DL = CxtI->getModule()->getDataLayout();
- LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
+ LVILatticeVal Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
if (Ret != Unknown)
return Ret;
@@ -1673,7 +1739,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
}
if (Baseline != Unknown)
return Baseline;
- }
+ }
// For a comparison where the V is outside this block, it's possible
// that we've branched on it before. Look to see if the value is known
@@ -1704,13 +1770,13 @@ void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
if (PImpl) {
const DataLayout &DL = PredBB->getModule()->getDataLayout();
- getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+ getImpl(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
}
}
void LazyValueInfo::eraseBlock(BasicBlock *BB) {
if (PImpl) {
const DataLayout &DL = BB->getModule()->getDataLayout();
- getCache(PImpl, AC, &DL, DT).eraseBlock(BB);
+ getImpl(PImpl, AC, &DL, DT).eraseBlock(BB);
}
}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index fdf5f55dab9f..2ca46b1fe872 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -35,27 +35,48 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/Lint.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <string>
+
using namespace llvm;
namespace {
@@ -64,7 +85,7 @@ namespace {
static const unsigned Write = 2;
static const unsigned Callee = 4;
static const unsigned Branchee = 8;
- }
+ } // end namespace MemRef
class Lint : public FunctionPass, public InstVisitor<Lint> {
friend class InstVisitor<Lint>;
@@ -159,7 +180,7 @@ namespace {
WriteValues({V1, Vs...});
}
};
-}
+} // end anonymous namespace
char Lint::ID = 0;
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -173,7 +194,7 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
// Assert - We know that cond should be true, if not print an error message.
#define Assert(C, ...) \
- do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
+ do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
// Lint::run - This is the main Analysis entry point for a
// function.
@@ -680,9 +701,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
return findValueImpl(W, OffsetOk, Visited);
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))
- if (W != V)
+ } else if (auto *C = dyn_cast<Constant>(V)) {
+ if (Value *W = ConstantFoldConstant(C, *DL, TLI))
+ if (W && W != V)
return findValueImpl(W, OffsetOk, Visited);
}
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 75426b54195a..e46541e6538d 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -55,6 +55,10 @@ static bool isDereferenceableAndAlignedPointer(
const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL,
const Instruction *CtxI, const DominatorTree *DT,
SmallPtrSetImpl<const Value *> &Visited) {
+ // Already visited? Bail out, we've likely hit unreachable code.
+ if (!Visited.insert(V).second)
+ return false;
+
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
@@ -87,9 +91,11 @@ static bool isDereferenceableAndAlignedPointer(
// then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also
// aligned to Align bytes.
- return Visited.insert(Base).second &&
- isDereferenceableAndAlignedPointer(Base, Align, Offset + Size, DL,
- CtxI, DT, Visited);
+ // Offset and Size may have different bit widths if we have visited an
+ // addrspacecast, so we can't do arithmetic directly on the APInt values.
+ return isDereferenceableAndAlignedPointer(
+ Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()),
+ DL, CtxI, DT, Visited);
}
// For gc.relocate, look through relocations
@@ -302,11 +308,11 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
"to scan backward from a given instruction, when searching for "
"available loaded value"));
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
+ BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AliasAnalysis *AA, AAMDNodes *AATags,
- bool *IsLoadCSE) {
+ AliasAnalysis *AA, bool *IsLoadCSE) {
if (MaxInstsToScan == 0)
MaxInstsToScan = ~0U;
@@ -356,8 +362,6 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
if (LI->isAtomic() < Load->isAtomic())
return nullptr;
- if (AATags)
- LI->getAAMetadata(*AATags);
if (IsLoadCSE)
*IsLoadCSE = true;
return LI;
@@ -377,8 +381,8 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
if (SI->isAtomic() < Load->isAtomic())
return nullptr;
- if (AATags)
- SI->getAAMetadata(*AATags);
+ if (IsLoadCSE)
+ *IsLoadCSE = false;
return SI->getOperand(0);
}
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 5214eb7c051c..2f3dca3d23fa 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -12,19 +12,61 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "loop-accesses"
@@ -94,14 +136,18 @@ bool VectorizerParams::isInterleaveForced() {
}
void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
- const Function *TheFunction,
- const Loop *TheLoop,
- const char *PassName) {
+ const Loop *TheLoop, const char *PassName,
+ OptimizationRemarkEmitter &ORE) {
DebugLoc DL = TheLoop->getStartLoc();
- if (const Instruction *I = Message.getInstr())
- DL = I->getDebugLoc();
- emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
- *TheFunction, DL, Message.str());
+ const Value *V = TheLoop->getHeader();
+ if (const Instruction *I = Message.getInstr()) {
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ V = I->getParent();
+ }
+ ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
}
Value *llvm::stripIntegerCast(Value *V) {
@@ -463,6 +509,7 @@ void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
}
namespace {
+
/// \brief Analyses memory accesses in a loop.
///
/// Checks whether run time pointer checks are needed and builds sets for data
@@ -886,7 +933,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
/// \brief Check whether the access through \p Ptr has a constant stride.
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
const Loop *Lp, const ValueToValueMap &StridesMap,
- bool Assume) {
+ bool Assume, bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
@@ -925,9 +972,9 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
bool IsInBoundsGEP = isInBoundsGep(Ptr);
- bool IsNoWrapAddRec =
- PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
- isNoWrapAddRec(Ptr, AR, PSE, Lp);
+ bool IsNoWrapAddRec = !ShouldCheckWrap ||
+ PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
+ isNoWrapAddRec(Ptr, AR, PSE, Lp);
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
if (Assume) {
@@ -1028,8 +1075,8 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return false;
// Make sure that A and B have the same type if required.
- if(CheckType && PtrA->getType() != PtrB->getType())
- return false;
+ if (CheckType && PtrA->getType() != PtrB->getType())
+ return false;
unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
@@ -1451,7 +1498,7 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
auto &IndexVector = Accesses.find(Access)->second;
SmallVector<Instruction *, 4> Insts;
- std::transform(IndexVector.begin(), IndexVector.end(),
+ transform(IndexVector,
std::back_inserter(Insts),
[&](unsigned Idx) { return this->InstMap[Idx]; });
return Insts;
@@ -1478,25 +1525,23 @@ bool LoopAccessInfo::canAnalyzeLoop() {
// We can only analyze innermost loops.
if (!TheLoop->empty()) {
DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
- emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
+ recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
return false;
}
// We must have a single backedge.
if (TheLoop->getNumBackEdges() != 1) {
DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
- emitAnalysis(
- LoopAccessReport() <<
- "loop control flow is not understood by analyzer");
+ recordAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by analyzer";
return false;
}
// We must have a single exiting block.
if (!TheLoop->getExitingBlock()) {
DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
- emitAnalysis(
- LoopAccessReport() <<
- "loop control flow is not understood by analyzer");
+ recordAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by analyzer";
return false;
}
@@ -1505,17 +1550,16 @@ bool LoopAccessInfo::canAnalyzeLoop() {
// instructions in the loop are executed the same number of times.
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
- emitAnalysis(
- LoopAccessReport() <<
- "loop control flow is not understood by analyzer");
+ recordAnalysis("CFGNotUnderstood")
+ << "loop control flow is not understood by analyzer";
return false;
}
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = PSE->getBackedgeTakenCount();
if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
- emitAnalysis(LoopAccessReport()
- << "could not determine number of loop iterations");
+ recordAnalysis("CantComputeNumberOfIterations")
+ << "could not determine number of loop iterations";
DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -1564,8 +1608,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
auto *Ld = dyn_cast<LoadInst>(&I);
if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
- emitAnalysis(LoopAccessReport(Ld)
- << "read with atomic ordering or volatile read");
+ recordAnalysis("NonSimpleLoad", Ld)
+ << "read with atomic ordering or volatile read";
DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
CanVecMem = false;
return;
@@ -1582,14 +1626,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
if (I.mayWriteToMemory()) {
auto *St = dyn_cast<StoreInst>(&I);
if (!St) {
- emitAnalysis(LoopAccessReport(St)
- << "instruction cannot be vectorized");
+ recordAnalysis("CantVectorizeInstruction", St)
+ << "instruction cannot be vectorized";
CanVecMem = false;
return;
}
if (!St->isSimple() && !IsAnnotatedParallel) {
- emitAnalysis(LoopAccessReport(St)
- << "write with atomic ordering or volatile write");
+ recordAnalysis("NonSimpleStore", St)
+ << "write with atomic ordering or volatile write";
DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
CanVecMem = false;
return;
@@ -1697,7 +1741,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
TheLoop, SymbolicStrides);
if (!CanDoRTIfNeeded) {
- emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
+ recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n");
CanVecMem = false;
@@ -1728,8 +1772,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
// Check that we found the bounds for the pointer.
if (!CanDoRTIfNeeded) {
- emitAnalysis(LoopAccessReport()
- << "cannot check memory dependencies at runtime");
+ recordAnalysis("CantCheckMemDepsAtRunTime")
+ << "cannot check memory dependencies at runtime";
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
CanVecMem = false;
return;
@@ -1744,12 +1788,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
<< (PtrRtChecking->Need ? "" : " don't")
<< " need runtime memory checks.\n");
else {
- emitAnalysis(
- LoopAccessReport()
+ recordAnalysis("UnsafeMemDep")
<< "unsafe dependent memory operations in loop. Use "
"#pragma loop distribute(enable) to allow loop distribution "
"to attempt to isolate the offending operations into a separate "
- "loop");
+ "loop";
DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
}
}
@@ -1763,13 +1806,35 @@ bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
return !DT->dominates(BB, Latch);
}
-void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
+OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
+ Instruction *I) {
assert(!Report && "Multiple reports generated");
- Report = Message;
+
+ Value *CodeRegion = TheLoop->getHeader();
+ DebugLoc DL = TheLoop->getStartLoc();
+
+ if (I) {
+ CodeRegion = I->getParent();
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ }
+
+ Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
+ CodeRegion);
+ return *Report;
}
bool LoopAccessInfo::isUniform(Value *V) const {
- return (PSE->getSE()->isLoopInvariant(PSE->getSE()->getSCEV(V), TheLoop));
+ auto *SE = PSE->getSE();
+ // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
+ // never considered uniform.
+ // TODO: Is this really what we want? Even without FP SCEV, we may want some
+ // trivially loop-invariant FP values to be considered uniform.
+ if (!SE->isSCEVable(V->getType()))
+ return false;
+ return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
}
// FIXME: this function is currently a duplicate of the one in
@@ -1784,6 +1849,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
}
namespace {
+
/// \brief IR Values for the lower and upper bounds of a pointer evolution. We
/// need to use value-handles because SCEV expansion can invalidate previously
/// expanded values. Thus expansion of a pointer can invalidate the bounds for
@@ -1792,6 +1858,7 @@ struct PointerBounds {
TrackingVH<Value> Start;
TrackingVH<Value> End;
};
+
} // end anonymous namespace
/// \brief Expand code for the lower and upper bound of the pointer group \p CG
@@ -1803,18 +1870,24 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
const SCEV *Sc = SE->getSCEV(Ptr);
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ LLVMContext &Ctx = Loc->getContext();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+
if (SE->isLoopInvariant(Sc, TheLoop)) {
DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
<< "\n");
- return {Ptr, Ptr};
+ // Ptr could be in the loop body. If so, expand a new one at the correct
+ // location.
+ Instruction *Inst = dyn_cast<Instruction>(Ptr);
+ Value *NewPtr = (Inst && TheLoop->contains(Inst))
+ ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
+ : Ptr;
+ return {NewPtr, NewPtr};
} else {
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
- LLVMContext &Ctx = Loc->getContext();
-
- // Use this type for pointer arithmetic.
- Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
Value *Start = nullptr, *End = nullptr;
-
DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
@@ -1833,9 +1906,8 @@ static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
// Here we're relying on the SCEV Expander's cache to only emit code for the
// same bounds once.
- std::transform(
- PointerChecks.begin(), PointerChecks.end(),
- std::back_inserter(ChecksWithBounds),
+ transform(
+ PointerChecks, std::back_inserter(ChecksWithBounds),
[&](const RuntimePointerChecking::PointerCheck &Check) {
PointerBounds
First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
@@ -1967,7 +2039,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
}
if (Report)
- OS.indent(Depth) << "Report: " << Report->str() << "\n";
+ OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
if (auto *Dependences = DepChecker->getDependences()) {
OS.indent(Depth) << "Dependences:\n";
@@ -2046,10 +2118,10 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
-char LoopAccessAnalysis::PassID;
+AnalysisKey LoopAccessAnalysis::Key;
-LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
- const AnalysisManager<Function> &FAM =
+LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM) {
+ const FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
Function &F = *L.getHeader()->getParent();
auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F);
@@ -2070,7 +2142,7 @@ LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
}
PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L,
- AnalysisManager<Loop> &AM) {
+ LoopAnalysisManager &AM) {
Function &F = *L.getHeader()->getParent();
auto &LAI = AM.getResult<LoopAccessAnalysis>(L);
OS << "Loop access info in function '" << F.getName() << "':\n";
@@ -2080,7 +2152,9 @@ PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L,
}
namespace llvm {
+
Pass *createLAAPass() {
return new LoopAccessLegacyAnalysis();
}
-}
+
+} // end namespace llvm
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 30f7ef392422..19c0171740c9 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -143,42 +143,47 @@ PHINode *Loop::getCanonicalInductionVariable() const {
return nullptr;
}
-bool Loop::isLCSSAForm(DominatorTree &DT) const {
- for (BasicBlock *BB : this->blocks()) {
- for (Instruction &I : *BB) {
- // Tokens can't be used in PHI nodes and live-out tokens prevent loop
- // optimizations, so for the purposes of considered LCSSA form, we
- // can ignore them.
- if (I.getType()->isTokenTy())
- continue;
+// Check that 'BB' doesn't have any uses outside of the 'L'
+static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
+ DominatorTree &DT) {
+ for (const Instruction &I : BB) {
+ // Tokens can't be used in PHI nodes and live-out tokens prevent loop
+ // optimizations, so for the purposes of considered LCSSA form, we
+ // can ignore them.
+ if (I.getType()->isTokenTy())
+ continue;
- for (Use &U : I.uses()) {
- Instruction *UI = cast<Instruction>(U.getUser());
- BasicBlock *UserBB = UI->getParent();
- if (PHINode *P = dyn_cast<PHINode>(UI))
- UserBB = P->getIncomingBlock(U);
-
- // Check the current block, as a fast-path, before checking whether
- // the use is anywhere in the loop. Most values are used in the same
- // block they are defined in. Also, blocks not reachable from the
- // entry are special; uses in them don't need to go through PHIs.
- if (UserBB != BB &&
- !contains(UserBB) &&
- DT.isReachableFromEntry(UserBB))
- return false;
- }
+ for (const Use &U : I.uses()) {
+ const Instruction *UI = cast<Instruction>(U.getUser());
+ const BasicBlock *UserBB = UI->getParent();
+ if (const PHINode *P = dyn_cast<PHINode>(UI))
+ UserBB = P->getIncomingBlock(U);
+
+ // Check the current block, as a fast-path, before checking whether
+ // the use is anywhere in the loop. Most values are used in the same
+ // block they are defined in. Also, blocks not reachable from the
+ // entry are special; uses in them don't need to go through PHIs.
+ if (UserBB != &BB && !L.contains(UserBB) &&
+ DT.isReachableFromEntry(UserBB))
+ return false;
}
}
-
return true;
}
-bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const {
- if (!isLCSSAForm(DT))
- return false;
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+ // For each block we check that it doesn't have any uses outside of this loop.
+ return all_of(this->blocks(), [&](const BasicBlock *BB) {
+ return isBlockInLCSSAForm(*this, *BB, DT);
+ });
+}
- return std::all_of(begin(), end(), [&](const Loop *L) {
- return L->isRecursivelyLCSSAForm(DT);
+bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const {
+ // For each block we check that it doesn't have any uses outside of it's
+ // innermost loop. This process will transitivelly guarntee that current loop
+ // and all of the nested loops are in the LCSSA form.
+ return all_of(this->blocks(), [&](const BasicBlock *BB) {
+ return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
});
}
@@ -300,23 +305,40 @@ bool Loop::isAnnotatedParallel() const {
}
DebugLoc Loop::getStartLoc() const {
+ return getLocRange().getStart();
+}
+
+Loop::LocRange Loop::getLocRange() const {
// If we have a debug location in the loop ID, then use it.
- if (MDNode *LoopID = getLoopID())
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
- if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i)))
- return DebugLoc(L);
+ if (MDNode *LoopID = getLoopID()) {
+ DebugLoc Start;
+ // We use the first DebugLoc in the header as the start location of the loop
+ // and if there is a second DebugLoc in the header we use it as end location
+ // of the loop.
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i))) {
+ if (!Start)
+ Start = DebugLoc(L);
+ else
+ return LocRange(Start, DebugLoc(L));
+ }
+ }
+
+ if (Start)
+ return LocRange(Start);
+ }
// Try the pre-header first.
if (BasicBlock *PHeadBB = getLoopPreheader())
if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
- return DL;
+ return LocRange(DL);
// If we have no pre-header or there are no instructions with debug
// info in it, try the header.
if (BasicBlock *HeadBB = getHeader())
- return HeadBB->getTerminator()->getDebugLoc();
+ return LocRange(HeadBB->getTerminator()->getDebugLoc());
- return DebugLoc();
+ return LocRange();
}
bool Loop::hasDedicatedExits() const {
@@ -366,8 +388,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
// In case of multiple edges from current block to exit block, collect
// only one edge in ExitBlocks. Use switchExitBlocks to keep track of
// duplicate edges.
- if (std::find(SwitchExitBlocks.begin(), SwitchExitBlocks.end(), Successor)
- == SwitchExitBlocks.end()) {
+ if (!is_contained(SwitchExitBlocks, Successor)) {
SwitchExitBlocks.push_back(Successor);
ExitBlocks.push_back(Successor);
}
@@ -387,6 +408,10 @@ BasicBlock *Loop::getUniqueExitBlock() const {
LLVM_DUMP_METHOD void Loop::dump() const {
print(dbgs());
}
+
+LLVM_DUMP_METHOD void Loop::dumpVerbose() const {
+ print(dbgs(), /*Depth=*/ 0, /*Verbose=*/ true);
+}
#endif
//===----------------------------------------------------------------------===//
@@ -532,8 +557,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
assert(Subloop && "subloop is not an ancestor of the original loop");
}
// Get the current nearest parent of the Subloop exits, initially Unloop.
- NearLoop =
- SubloopParents.insert(std::make_pair(Subloop, &Unloop)).first->second;
+ NearLoop = SubloopParents.insert({Subloop, &Unloop}).first->second;
}
succ_iterator I = succ_begin(BB), E = succ_end(BB);
@@ -645,9 +669,9 @@ void LoopInfo::markAsRemoved(Loop *Unloop) {
}
}
-char LoopAnalysis::PassID;
+AnalysisKey LoopAnalysis::Key;
-LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
// FIXME: Currently we create a LoopInfo from scratch for every function.
// This may prove to be too wasteful due to deallocating and re-allocating
// memory each time for the underlying map and vector datastructures. At some
@@ -660,7 +684,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
}
PreservedAnalyses LoopPrinterPass::run(Function &F,
- AnalysisManager<Function> &AM) {
+ FunctionAnalysisManager &AM) {
AM.getResult<LoopAnalysis>(F).print(OS);
return PreservedAnalyses::all();
}
@@ -702,8 +726,10 @@ void LoopInfoWrapperPass::verifyAnalysis() const {
// -verify-loop-info option can enable this. In order to perform some
// checking by default, LoopPass has been taught to call verifyLoop manually
// during loop pass sequences.
- if (VerifyLoopInfo)
- LI.verify();
+ if (VerifyLoopInfo) {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI.verify(DT);
+ }
}
void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -715,6 +741,14 @@ void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
LI.print(OS);
}
+PreservedAnalyses LoopVerifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ LI.verify(DT);
+ return PreservedAnalyses::all();
+}
+
//===----------------------------------------------------------------------===//
// LoopBlocksDFS implementation
//
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 222345c9a980..b5b8040984d7 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/OptBisect.h"
@@ -48,7 +49,7 @@ public:
[](BasicBlock *BB) { return BB; });
if (BBI != L->blocks().end() &&
isFunctionInPrintList((*BBI)->getParent()->getName())) {
- AnalysisManager<Loop> DummyLAM;
+ LoopAnalysisManager DummyLAM;
P.run(*L, DummyLAM);
}
return false;
@@ -131,8 +132,8 @@ void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) {
// Recurse through all subloops and all loops into LQ.
static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
LQ.push_back(L);
- for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
- addLoopIntoQueue(*I, LQ);
+ for (Loop *I : reverse(*L))
+ addLoopIntoQueue(I, LQ);
}
/// Pass Manager itself does not invalidate any analysis info.
@@ -140,6 +141,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
// LPPassManager needs LoopInfo. In the long term LoopInfo class will
// become part of LPPassManager.
Info.addRequired<LoopInfoWrapperPass>();
+ Info.addRequired<DominatorTreeWrapperPass>();
Info.setPreservesAll();
}
@@ -148,6 +150,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
bool LPPassManager::runOnFunction(Function &F) {
auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
LI = &LIWP.getLoopInfo();
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
bool Changed = false;
// Collect inherited analysis from Module level pass manager.
@@ -162,16 +165,14 @@ bool LPPassManager::runOnFunction(Function &F) {
// Note that LoopInfo::iterator visits loops in reverse program
// order. Here, reverse_iterator gives us a forward order, and the LoopQueue
// reverses the order a third time by popping from the back.
- for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
- addLoopIntoQueue(*I, LQ);
+ for (Loop *L : reverse(*LI))
+ addLoopIntoQueue(L, LQ);
if (LQ.empty()) // No loops, skip calling finalizers
return false;
// Initialization
- for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
- I != E; ++I) {
- Loop *L = *I;
+ for (Loop *L : LQ) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
Changed |= P->doInitialization(L, *this);
@@ -220,6 +221,12 @@ bool LPPassManager::runOnFunction(Function &F) {
TimeRegion PassTimer(getPassTimer(&LIWP));
CurrentLoop->verifyLoop();
}
+ // Here we apply same reasoning as in the above case. Only difference
+ // is that LPPassManager might run passes which do not require LCSSA
+ // form (LoopPassPrinter for example). We should skip verification for
+ // such passes.
+ if (mustPreserveAnalysisID(LCSSAVerificationPass::ID))
+ CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI);
// Then call the regular verifyAnalysis functions.
verifyPreservedAnalysis(P);
@@ -355,3 +362,8 @@ bool LoopPass::skipLoop(const Loop *L) const {
}
return false;
}
+
+char LCSSAVerificationPass::ID = 0;
+INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier",
+ false, false)
+
diff --git a/lib/Analysis/LoopPassManager.cpp b/lib/Analysis/LoopPassManager.cpp
index 8bac19a58217..044e5d55dafd 100644
--- a/lib/Analysis/LoopPassManager.cpp
+++ b/lib/Analysis/LoopPassManager.cpp
@@ -17,12 +17,31 @@
using namespace llvm;
-// Explicit instantiations for core typedef'ed templates.
+// Explicit template instantiations and specialization defininitions for core
+// template typedefs.
namespace llvm {
template class PassManager<Loop>;
template class AnalysisManager<Loop>;
template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>;
template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>;
+
+template <>
+bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
+ Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // If this proxy isn't marked as preserved, the set of Function objects in
+ // the module may have changed. We therefore can't call
+ // InnerAM->invalidate(), because any pointers to Functions it has may be
+ // stale.
+ auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Loop>>())
+ InnerAM->clear();
+
+ // FIXME: Proper suppor for invalidation isn't yet implemented for the LPM.
+
+ // Return false to indicate that this result is still a valid proxy.
+ return false;
+}
}
PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
@@ -32,6 +51,7 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PA.preserve<ScalarEvolutionAnalysis>();
// TODO: What we really want to do here is preserve an AA category, but that
// concept doesn't exist yet.
+ PA.preserve<AAManager>();
PA.preserve<BasicAA>();
PA.preserve<GlobalsAA>();
PA.preserve<SCEVAA>();
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index f23477622bec..2d8274040d39 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -77,8 +77,12 @@ static const std::pair<LibFunc::Func, AllocFnsTy> AllocationFnData[] = {
// TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
+static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
+ bool &IsNoBuiltin) {
+ // Don't care about intrinsics in this case.
+ if (isa<IntrinsicInst>(V))
+ return nullptr;
-static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
if (LookThroughBitCast)
V = V->stripPointerCasts();
@@ -86,8 +90,7 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
if (!CS.getInstruction())
return nullptr;
- if (CS.isNoBuiltin())
- return nullptr;
+ IsNoBuiltin = CS.isNoBuiltin();
Function *Callee = CS.getCalledFunction();
if (!Callee || !Callee->isDeclaration())
@@ -98,47 +101,19 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
/// Returns the allocation data for the given value if it's either a call to a
/// known allocation function, or a call to a function with the allocsize
/// attribute.
-static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
- const TargetLibraryInfo *TLI,
- bool LookThroughBitCast = false) {
- // Skip intrinsics
- if (isa<IntrinsicInst>(V))
- return None;
-
- const Function *Callee = getCalledFunction(V, LookThroughBitCast);
- if (!Callee)
- return None;
-
- // If it has allocsize, we can skip checking if it's a known function.
- //
- // MallocLike is chosen here because allocsize makes no guarantees about the
- // nullness of the result of the function, nor does it deal with strings, nor
- // does it require that the memory returned is zeroed out.
- LLVM_CONSTEXPR auto AllocSizeAllocTy = MallocLike;
- if ((AllocTy & AllocSizeAllocTy) == AllocSizeAllocTy &&
- Callee->hasFnAttribute(Attribute::AllocSize)) {
- Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize);
- std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs();
-
- AllocFnsTy Result;
- Result.AllocTy = AllocSizeAllocTy;
- Result.NumParams = Callee->getNumOperands();
- Result.FstParam = Args.first;
- Result.SndParam = Args.second.getValueOr(-1);
- return Result;
- }
-
+static Optional<AllocFnsTy>
+getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
+ const TargetLibraryInfo *TLI) {
// Make sure that the function is available.
StringRef FnName = Callee->getName();
LibFunc::Func TLIFn;
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return None;
- const auto *Iter =
- std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData),
- [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
- return P.first == TLIFn;
- });
+ const auto *Iter = find_if(
+ AllocationFnData, [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
+ return P.first == TLIFn;
+ });
if (Iter == std::end(AllocationFnData))
return None;
@@ -164,6 +139,48 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
return None;
}
+static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
+ const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false) {
+ bool IsNoBuiltinCall;
+ if (const Function *Callee =
+ getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall))
+ if (!IsNoBuiltinCall)
+ return getAllocationDataForFunction(Callee, AllocTy, TLI);
+ return None;
+}
+
+static Optional<AllocFnsTy> getAllocationSize(const Value *V,
+ const TargetLibraryInfo *TLI) {
+ bool IsNoBuiltinCall;
+ const Function *Callee =
+ getCalledFunction(V, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
+ if (!Callee)
+ return None;
+
+ // Prefer to use existing information over allocsize. This will give us an
+ // accurate AllocTy.
+ if (!IsNoBuiltinCall)
+ if (Optional<AllocFnsTy> Data =
+ getAllocationDataForFunction(Callee, AnyAlloc, TLI))
+ return Data;
+
+ Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize);
+ if (Attr == Attribute())
+ return None;
+
+ std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs();
+
+ AllocFnsTy Result;
+ // Because allocsize only tells us how many bytes are allocated, we're not
+ // really allowed to assume anything, so we use MallocLike.
+ Result.AllocTy = MallocLike;
+ Result.NumParams = Callee->getNumOperands();
+ Result.FstParam = Args.first;
+ Result.SndParam = Args.second.getValueOr(-1);
+ return Result;
+}
+
static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
return CS && CS.paramHasAttr(AttributeSet::ReturnIndex, Attribute::NoAlias);
@@ -389,6 +406,36 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
return true;
}
+ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ bool MustSucceed) {
+ assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
+ "ObjectSize must be a call to llvm.objectsize!");
+
+ bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
+ ObjSizeMode Mode;
+ // Unless we have to fold this to something, try to be as accurate as
+ // possible.
+ if (MustSucceed)
+ Mode = MaxVal ? ObjSizeMode::Max : ObjSizeMode::Min;
+ else
+ Mode = ObjSizeMode::Exact;
+
+ // FIXME: Does it make sense to just return a failure value if the size won't
+ // fit in the output and `!MustSucceed`?
+ uint64_t Size;
+ auto *ResultType = cast<IntegerType>(ObjectSize->getType());
+ if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, false, Mode) &&
+ isUIntN(ResultType->getBitWidth(), Size))
+ return ConstantInt::get(ResultType, Size);
+
+ if (!MustSucceed)
+ return nullptr;
+
+ return ConstantInt::get(ResultType, MaxVal ? -1ULL : 0);
+}
+
STATISTIC(ObjectVisitorArgument,
"Number of arguments with unsolved size and offset");
STATISTIC(ObjectVisitorLoad,
@@ -476,8 +523,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
}
SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
- Optional<AllocFnsTy> FnData =
- getAllocationData(CS.getInstruction(), AnyAlloc, TLI);
+ Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI);
if (!FnData)
return unknown();
@@ -736,8 +782,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) {
- Optional<AllocFnsTy> FnData =
- getAllocationData(CS.getInstruction(), AnyAlloc, TLI);
+ Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI);
if (!FnData)
return unknown();
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 33499334fefa..2746361ab4b5 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -15,24 +15,38 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+
using namespace llvm;
#define DEBUG_TYPE "memdep"
@@ -166,7 +180,7 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
BasicBlock *BB) {
unsigned Limit = BlockScanLimit;
- // Walk backwards through the block, looking for dependencies
+ // Walk backwards through the block, looking for dependencies.
while (ScanIt != BB->begin()) {
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
@@ -220,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
return MemDepResult::getNonFuncLocal();
}
-/// Return true if LI is a load that would fully overlap MemLoc if done as
-/// a wider legal integer load.
-///
-/// MemLocBase, MemLocOffset are lazily computed here the first time the
-/// base/offs of memloc is needed.
-static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
- const Value *&MemLocBase,
- int64_t &MemLocOffs,
- const LoadInst *LI) {
- const DataLayout &DL = LI->getModule()->getDataLayout();
-
- // If we haven't already computed the base/offset of MemLoc, do so now.
- if (!MemLocBase)
- MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
-
- unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
- MemLocBase, MemLocOffs, MemLoc.Size, LI);
- return Size != 0;
-}
-
unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
const LoadInst *LI) {
@@ -292,7 +286,7 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
- while (1) {
+ while (true) {
// If this load size is bigger than our known alignment or would not fit
// into a native integer register, then we fail.
if (NewLoadByteSize > LoadAlign ||
@@ -327,7 +321,7 @@ static bool isVolatile(Instruction *Inst) {
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst) {
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -338,49 +332,69 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
return invariantGroupDependency;
}
}
- return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+ return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+ Limit);
}
MemDepResult
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
- BasicBlock *BB) {
+ BasicBlock *BB) {
+
+ auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
+ if (!InvariantGroupMD)
+ return MemDepResult::getUnknown();
+
Value *LoadOperand = LI->getPointerOperand();
// It's is not safe to walk the use list of global value, because function
// passes aren't allowed to look outside their functions.
if (isa<GlobalValue>(LoadOperand))
return MemDepResult::getUnknown();
- auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
- if (!InvariantGroupMD)
- return MemDepResult::getUnknown();
-
- MemDepResult Result = MemDepResult::getUnknown();
- llvm::SmallSet<Value *, 14> Seen;
// Queue to process all pointers that are equivalent to load operand.
- llvm::SmallVector<Value *, 8> LoadOperandsQueue;
- LoadOperandsQueue.push_back(LoadOperand);
+ SmallVector<const Value *, 8> LoadOperandsQueue;
+ SmallSet<const Value *, 14> SeenValues;
+ auto TryInsertToQueue = [&](Value *V) {
+ if (SeenValues.insert(V).second)
+ LoadOperandsQueue.push_back(V);
+ };
+
+ TryInsertToQueue(LoadOperand);
while (!LoadOperandsQueue.empty()) {
- Value *Ptr = LoadOperandsQueue.pop_back_val();
+ const Value *Ptr = LoadOperandsQueue.pop_back_val();
+ assert(Ptr);
if (isa<GlobalValue>(Ptr))
continue;
- if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
- if (Seen.insert(BCI->getOperand(0)).second) {
- LoadOperandsQueue.push_back(BCI->getOperand(0));
- }
- }
-
- for (Use &Us : Ptr->uses()) {
+ // Value comes from bitcast: Ptr = bitcast x. Insert x.
+ if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
+ TryInsertToQueue(BCI->getOperand(0));
+ // Gep with zeros is equivalent to bitcast.
+ // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
+ // or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
+ // pointers will be upstream then both cases will be gone (and this BFS
+ // also won't be needed).
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ if (GEP->hasAllZeroIndices())
+ TryInsertToQueue(GEP->getOperand(0));
+
+ for (const Use &Us : Ptr->uses()) {
auto *U = dyn_cast<Instruction>(Us.getUser());
if (!U || U == LI || !DT.dominates(U, LI))
continue;
- if (auto *BCI = dyn_cast<BitCastInst>(U)) {
- if (Seen.insert(BCI).second) {
- LoadOperandsQueue.push_back(BCI);
- }
+ // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
+ // users. U = bitcast Ptr
+ if (isa<BitCastInst>(U)) {
+ TryInsertToQueue(U);
continue;
}
+ // U = getelementptr Ptr, 0, 0...
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+ if (GEP->hasAllZeroIndices()) {
+ TryInsertToQueue(U);
+ continue;
+ }
+
// If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
@@ -389,18 +403,20 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
return MemDepResult::getDef(U);
}
}
- return Result;
+ return MemDepResult::getUnknown();
}
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
- BasicBlock *BB, Instruction *QueryInst) {
-
- const Value *MemLocBase = nullptr;
- int64_t MemLocOffset = 0;
- unsigned Limit = BlockScanLimit;
+ BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
bool isInvariantLoad = false;
+ if (!Limit) {
+ unsigned DefaultLimit = BlockScanLimit;
+ return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+ &DefaultLimit);
+ }
+
// We must be careful with atomic accesses, as they may allow another thread
// to touch this location, clobbering it. We are conservative: if the
// QueryInst is not a simple (non-atomic) memory access, we automatically
@@ -474,8 +490,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
// Limit the amount of scanning we do so we don't end up with quadratic
// running time on extreme testcases.
- --Limit;
- if (!Limit)
+ --*Limit;
+ if (!*Limit)
return MemDepResult::getUnknown();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -530,21 +546,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
AliasResult R = AA.alias(LoadLoc, MemLoc);
if (isLoad) {
- if (R == NoAlias) {
- // If this is an over-aligned integer load (for example,
- // "load i8* %P, align 4") see if it would obviously overlap with the
- // queried location if widened to a larger load (e.g. if the queried
- // location is 1 byte at P+1). If so, return it as a load/load
- // clobber result, allowing the client to decide to widen the load if
- // it wants to.
- if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
- if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
- isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
- MemLocOffset, LI))
- return MemDepResult::getClobber(Inst);
- }
+ if (R == NoAlias)
continue;
- }
// Must aliased loads are defs of each other.
if (R == MustAlias)
@@ -697,7 +700,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
- // No dependence found. If this is the entry block of the function, it is
+ // No dependence found. If this is the entry block of the function, it is
// unknown, otherwise it is non-local.
if (QueryParent != &QueryParent->getParent()->getEntryBlock())
LocalCache = MemDepResult::getNonLocal();
@@ -709,7 +712,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
if (MemLoc.Ptr) {
// If we can do a pointer scan, make it happen.
bool isLoad = !(MR & MRI_Mod);
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+ if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
LocalCache = getPointerDependencyFrom(
@@ -1010,7 +1013,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
Cache.insert(Entry, Val);
- // FALL THROUGH.
+ LLVM_FALLTHROUGH;
}
case 1:
// One new entry, Just insert the new value at the appropriate position.
@@ -1659,10 +1662,10 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
#endif
}
-char MemoryDependenceAnalysis::PassID;
+AnalysisKey MemoryDependenceAnalysis::Key;
MemoryDependenceResults
-MemoryDependenceAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
auto &AA = AM.getResult<AAManager>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
@@ -1684,6 +1687,7 @@ INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
}
+
MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {}
void MemoryDependenceWrapperPass::releaseMemory() {
@@ -1698,6 +1702,28 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
+bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // Check whether our analysis is preserved.
+ auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+ // If not, give up now.
+ return true;
+
+ // Check whether the analyses we depend on became invalid for any reason.
+ if (Inv.invalidate<AAManager>(F, PA) ||
+ Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+ Inv.invalidate<DominatorTreeAnalysis>(F, PA))
+ return true;
+
+ // Otherwise this analysis result remains valid.
+ return false;
+}
+
+unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
+ return BlockScanLimit;
+}
+
bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 36c47141a45f..f675830aa67d 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -74,7 +74,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
// filenames), so just print a few useful things.
for (DICompileUnit *CU : Finder.compile_units()) {
O << "Compile unit: ";
- if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
+ auto Lang = dwarf::LanguageString(CU->getSourceLanguage());
+ if (!Lang.empty())
O << Lang;
else
O << "unknown-language(" << CU->getSourceLanguage() << ")";
@@ -90,7 +91,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
O << '\n';
}
- for (const DIGlobalVariable *GV : Finder.global_variables()) {
+ for (auto GVU : Finder.global_variables()) {
+ const auto *GV = GVU->getVariable();
O << "Global variable: " << GV->getName();
printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
if (!GV->getLinkageName().empty())
@@ -105,14 +107,15 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
if (auto *BT = dyn_cast<DIBasicType>(T)) {
O << " ";
- if (const char *Encoding =
- dwarf::AttributeEncodingString(BT->getEncoding()))
+ auto Encoding = dwarf::AttributeEncodingString(BT->getEncoding());
+ if (!Encoding.empty())
O << Encoding;
else
O << "unknown-encoding(" << BT->getEncoding() << ')';
} else {
O << ' ';
- if (const char *Tag = dwarf::TagString(T->getTag()))
+ auto Tag = dwarf::TagString(T->getTag());
+ if (!Tag.empty())
O << Tag;
else
O << "unknown-tag(" << T->getTag() << ")";
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index c9ac2bdb7942..1d2ffc1abe1f 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -13,16 +13,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Object/IRObjectFile.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -31,7 +37,7 @@ using namespace llvm;
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
-static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
+static void findRefEdges(const User *CurUser, SetVector<ValueInfo> &RefEdges,
SmallPtrSet<const User *, 8> &Visited) {
SmallVector<const User *, 32> Worklist;
Worklist.push_back(CurUser);
@@ -50,12 +56,12 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
continue;
if (isa<BlockAddress>(Operand))
continue;
- if (isa<GlobalValue>(Operand)) {
+ if (auto *GV = dyn_cast<GlobalValue>(Operand)) {
// We have a reference to a global value. This should be added to
// the reference set unless it is a callee. Callees are handled
// specially by WriteFunction and are added to a separate list.
if (!(CS && CS.isCallee(&OI)))
- RefEdges.insert(Operand);
+ RefEdges.insert(GV);
continue;
}
Worklist.push_back(Operand);
@@ -63,98 +69,178 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
}
}
-void ModuleSummaryIndexBuilder::computeFunctionSummary(
- const Function &F, BlockFrequencyInfo *BFI) {
- // Summary not currently supported for anonymous functions, they must
- // be renamed.
- if (!F.hasName())
- return;
+static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
+ ProfileSummaryInfo *PSI) {
+ if (!PSI)
+ return CalleeInfo::HotnessType::Unknown;
+ if (PSI->isHotCount(ProfileCount))
+ return CalleeInfo::HotnessType::Hot;
+ if (PSI->isColdCount(ProfileCount))
+ return CalleeInfo::HotnessType::Cold;
+ return CalleeInfo::HotnessType::None;
+}
+
+static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+ const Function &F, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI,
+ bool HasLocalsInUsed) {
+ // Summary not currently supported for anonymous functions, they should
+ // have been named.
+ assert(F.hasName());
unsigned NumInsts = 0;
// Map from callee ValueId to profile count. Used to accumulate profile
// counts for all static calls to a given callee.
- DenseMap<const Value *, CalleeInfo> CallGraphEdges;
- DenseMap<GlobalValue::GUID, CalleeInfo> IndirectCallEdges;
- DenseSet<const Value *> RefEdges;
+ MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
+ SetVector<ValueInfo> RefEdges;
+ SetVector<GlobalValue::GUID> TypeTests;
ICallPromotionAnalysis ICallAnalysis;
+ bool HasInlineAsmMaybeReferencingInternal = false;
SmallPtrSet<const User *, 8> Visited;
for (const BasicBlock &BB : F)
for (const Instruction &I : BB) {
- if (!isa<DbgInfoIntrinsic>(I))
- ++NumInsts;
-
- if (auto CS = ImmutableCallSite(&I)) {
- auto *CalledFunction = CS.getCalledFunction();
- // Check if this is a direct call to a known function.
- if (CalledFunction) {
- if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) {
- auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
- auto *CalleeId =
- M->getValueSymbolTable().lookup(CalledFunction->getName());
- CallGraphEdges[CalleeId] +=
- (ScaledCount ? ScaledCount.getValue() : 0);
- }
- } else {
- // Otherwise, check for an indirect call (call to a non-const value
- // that isn't an inline assembly call).
- const CallInst *CI = dyn_cast<CallInst>(&I);
- if (CS.getCalledValue() && !isa<Constant>(CS.getCalledValue()) &&
- !(CI && CI->isInlineAsm())) {
- uint32_t NumVals, NumCandidates;
- uint64_t TotalCount;
- auto CandidateProfileData =
- ICallAnalysis.getPromotionCandidatesForInstruction(
- &I, NumVals, TotalCount, NumCandidates);
- for (auto &Candidate : CandidateProfileData)
- IndirectCallEdges[Candidate.Value] += Candidate.Count;
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ ++NumInsts;
+ findRefEdges(&I, RefEdges, Visited);
+ auto CS = ImmutableCallSite(&I);
+ if (!CS)
+ continue;
+
+ const auto *CI = dyn_cast<CallInst>(&I);
+ // Since we don't know exactly which local values are referenced in inline
+ // assembly, conservatively mark the function as possibly referencing
+ // a local value from inline assembly to ensure we don't export a
+ // reference (which would require renaming and promotion of the
+ // referenced value).
+ if (HasLocalsInUsed && CI && CI->isInlineAsm())
+ HasInlineAsmMaybeReferencingInternal = true;
+
+ auto *CalledValue = CS.getCalledValue();
+ auto *CalledFunction = CS.getCalledFunction();
+ // Check if this is an alias to a function. If so, get the
+ // called aliasee for the checks below.
+ if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
+ assert(!CalledFunction && "Expected null called function in callsite for alias");
+ CalledFunction = dyn_cast<Function>(GA->getBaseObject());
+ }
+ // Check if this is a direct call to a known function or a known
+ // intrinsic, or an indirect call with profile data.
+ if (CalledFunction) {
+ if (CalledFunction->isIntrinsic()) {
+ if (CalledFunction->getIntrinsicID() != Intrinsic::type_test)
+ continue;
+ // Produce a summary from type.test intrinsics. We only summarize
+ // type.test intrinsics that are used other than by an llvm.assume
+ // intrinsic. Intrinsics that are assumed are relevant only to the
+ // devirtualization pass, not the type test lowering pass.
+ bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
+ auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
+ if (!AssumeCI)
+ return true;
+ Function *F = AssumeCI->getCalledFunction();
+ return !F || F->getIntrinsicID() != Intrinsic::assume;
+ });
+ if (HasNonAssumeUses) {
+ auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()))
+ TypeTests.insert(GlobalValue::getGUID(TypeId->getString()));
}
}
+ // We should have named any anonymous globals
+ assert(CalledFunction->hasName());
+ auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
+ auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
+ : CalleeInfo::HotnessType::Unknown;
+
+ // Use the original CalledValue, in case it was an alias. We want
+ // to record the call edge to the alias in that case. Eventually
+ // an alias summary will be created to associate the alias and
+ // aliasee.
+ CallGraphEdges[cast<GlobalValue>(CalledValue)].updateHotness(Hotness);
+ } else {
+ // Skip inline assembly calls.
+ if (CI && CI->isInlineAsm())
+ continue;
+ // Skip direct calls.
+ if (!CS.getCalledValue() || isa<Constant>(CS.getCalledValue()))
+ continue;
+
+ uint32_t NumVals, NumCandidates;
+ uint64_t TotalCount;
+ auto CandidateProfileData =
+ ICallAnalysis.getPromotionCandidatesForInstruction(
+ &I, NumVals, TotalCount, NumCandidates);
+ for (auto &Candidate : CandidateProfileData)
+ CallGraphEdges[Candidate.Value].updateHotness(
+ getHotness(Candidate.Count, PSI));
}
- findRefEdges(&I, RefEdges, Visited);
}
GlobalValueSummary::GVFlags Flags(F);
- std::unique_ptr<FunctionSummary> FuncSummary =
- llvm::make_unique<FunctionSummary>(Flags, NumInsts);
- FuncSummary->addCallGraphEdges(CallGraphEdges);
- FuncSummary->addCallGraphEdges(IndirectCallEdges);
- FuncSummary->addRefEdges(RefEdges);
- Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary));
+ auto FuncSummary = llvm::make_unique<FunctionSummary>(
+ Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
+ TypeTests.takeVector());
+ if (HasInlineAsmMaybeReferencingInternal)
+ FuncSummary->setHasInlineAsmMaybeReferencingInternal();
+ Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
}
-void ModuleSummaryIndexBuilder::computeVariableSummary(
- const GlobalVariable &V) {
- DenseSet<const Value *> RefEdges;
+static void computeVariableSummary(ModuleSummaryIndex &Index,
+ const GlobalVariable &V) {
+ SetVector<ValueInfo> RefEdges;
SmallPtrSet<const User *, 8> Visited;
findRefEdges(&V, RefEdges, Visited);
GlobalValueSummary::GVFlags Flags(V);
- std::unique_ptr<GlobalVarSummary> GVarSummary =
- llvm::make_unique<GlobalVarSummary>(Flags);
- GVarSummary->addRefEdges(RefEdges);
- Index->addGlobalValueSummary(V.getName(), std::move(GVarSummary));
+ auto GVarSummary =
+ llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+ Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary));
+}
+
+static void computeAliasSummary(ModuleSummaryIndex &Index,
+ const GlobalAlias &A) {
+ GlobalValueSummary::GVFlags Flags(A);
+ auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
+ auto *Aliasee = A.getBaseObject();
+ auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
+ assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
+ AS->setAliasee(AliaseeSummary);
+ Index.addGlobalValueSummary(A.getName(), std::move(AS));
}
-ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder(
- const Module *M,
- std::function<BlockFrequencyInfo *(const Function &F)> Ftor)
- : Index(llvm::make_unique<ModuleSummaryIndex>()), M(M) {
- // Check if the module can be promoted, otherwise just disable importing from
- // it by not emitting any summary.
- // FIXME: we could still import *into* it most of the time.
- if (!moduleCanBeRenamedForThinLTO(*M))
- return;
+ModuleSummaryIndex llvm::buildModuleSummaryIndex(
+ const Module &M,
+ std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
+ ProfileSummaryInfo *PSI) {
+ ModuleSummaryIndex Index;
+
+ // Identify the local values in the llvm.used and llvm.compiler.used sets,
+ // which should not be exported as they would then require renaming and
+ // promotion, but we may have opaque uses e.g. in inline asm. We collect them
+ // here because we use this information to mark functions containing inline
+ // assembly calls as not importable.
+ SmallPtrSet<GlobalValue *, 8> LocalsUsed;
+ SmallPtrSet<GlobalValue *, 8> Used;
+ // First collect those in the llvm.used set.
+ collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
+ // Next collect those in the llvm.compiler.used set.
+ collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+ for (auto *V : Used) {
+ if (V->hasLocalLinkage())
+ LocalsUsed.insert(V);
+ }
// Compute summaries for all functions defined in module, and save in the
// index.
- for (auto &F : *M) {
+ for (auto &F : M) {
if (F.isDeclaration())
continue;
BlockFrequencyInfo *BFI = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFIPtr;
- if (Ftor)
- BFI = Ftor(F);
+ if (GetBFICallback)
+ BFI = GetBFICallback(F);
else if (F.getEntryCount().hasValue()) {
LoopInfo LI{DominatorTree(const_cast<Function &>(F))};
BranchProbabilityInfo BPI{F, LI};
@@ -162,16 +248,89 @@ ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder(
BFI = BFIPtr.get();
}
- computeFunctionSummary(F, BFI);
+ computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty());
}
// Compute summaries for all variables defined in module, and save in the
// index.
- for (const GlobalVariable &G : M->globals()) {
+ for (const GlobalVariable &G : M.globals()) {
if (G.isDeclaration())
continue;
- computeVariableSummary(G);
+ computeVariableSummary(Index, G);
+ }
+
+ // Compute summaries for all aliases defined in module, and save in the
+ // index.
+ for (const GlobalAlias &A : M.aliases())
+ computeAliasSummary(Index, A);
+
+ for (auto *V : LocalsUsed) {
+ auto *Summary = Index.getGlobalValueSummary(*V);
+ assert(Summary && "Missing summary for global value");
+ Summary->setNoRename();
+ }
+
+ if (!M.getModuleInlineAsm().empty()) {
+ // Collect the local values defined by module level asm, and set up
+ // summaries for these symbols so that they can be marked as NoRename,
+ // to prevent export of any use of them in regular IR that would require
+ // renaming within the module level asm. Note we don't need to create a
+ // summary for weak or global defs, as they don't need to be flagged as
+ // NoRename, and defs in module level asm can't be imported anyway.
+ // Also, any values used but not defined within module level asm should
+ // be listed on the llvm.used or llvm.compiler.used global and marked as
+ // referenced from there.
+ ModuleSymbolTable::CollectAsmSymbols(
+ Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
+ [&M, &Index](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+ // Symbols not marked as Weak or Global are local definitions.
+ if (Flags & (object::BasicSymbolRef::SF_Weak |
+ object::BasicSymbolRef::SF_Global))
+ return;
+ GlobalValue *GV = M.getNamedValue(Name);
+ if (!GV)
+ return;
+ assert(GV->isDeclaration() && "Def in module asm already has definition");
+ GlobalValueSummary::GVFlags GVFlags(
+ GlobalValue::InternalLinkage,
+ /* NoRename */ true,
+ /* HasInlineAsmMaybeReferencingInternal */ false,
+ /* IsNotViableToInline */ true);
+ // Create the appropriate summary type.
+ if (isa<Function>(GV)) {
+ std::unique_ptr<FunctionSummary> Summary =
+ llvm::make_unique<FunctionSummary>(
+ GVFlags, 0, ArrayRef<ValueInfo>{},
+ ArrayRef<FunctionSummary::EdgeTy>{},
+ ArrayRef<GlobalValue::GUID>{});
+ Summary->setNoRename();
+ Index.addGlobalValueSummary(Name, std::move(Summary));
+ } else {
+ std::unique_ptr<GlobalVarSummary> Summary =
+ llvm::make_unique<GlobalVarSummary>(GVFlags,
+ ArrayRef<ValueInfo>{});
+ Summary->setNoRename();
+ Index.addGlobalValueSummary(Name, std::move(Summary));
+ }
+ });
}
+
+ return Index;
+}
+
+AnalysisKey ModuleSummaryIndexAnalysis::Key;
+
+ModuleSummaryIndex
+ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
+ ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ return buildModuleSummaryIndex(
+ M,
+ [&FAM](const Function &F) {
+ return &FAM.getResult<BlockFrequencyAnalysis>(
+ *const_cast<Function *>(&F));
+ },
+ &PSI);
}
char ModuleSummaryIndexWrapperPass::ID = 0;
@@ -191,59 +350,25 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
}
bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
- IndexBuilder = llvm::make_unique<ModuleSummaryIndexBuilder>(
- &M, [this](const Function &F) {
+ auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ Index = buildModuleSummaryIndex(
+ M,
+ [this](const Function &F) {
return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
*const_cast<Function *>(&F))
.getBFI());
- });
+ },
+ &PSI);
return false;
}
bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) {
- IndexBuilder.reset();
+ Index.reset();
return false;
}
void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<BlockFrequencyInfoWrapperPass>();
-}
-
-bool llvm::moduleCanBeRenamedForThinLTO(const Module &M) {
- // We cannot currently promote or rename anything used in inline assembly,
- // which are not visible to the compiler. Detect a possible case by looking
- // for a llvm.used local value, in conjunction with an inline assembly call
- // in the module. Prevent importing of any modules containing these uses by
- // suppressing generation of the index. This also prevents importing
- // into this module, which is also necessary to avoid needing to rename
- // in case of a name clash between a local in this module and an imported
- // global.
- // FIXME: If we find we need a finer-grained approach of preventing promotion
- // and renaming of just the functions using inline assembly we will need to:
- // - Add flag in the function summaries to identify those with inline asm.
- // - Prevent importing of any functions with flag set.
- // - Prevent importing of any global function with the same name as a
- // function in current module that has the flag set.
- // - For any llvm.used value that is exported and promoted, add a private
- // alias to the original name in the current module (even if we don't
- // export the function using those values in inline asm, another function
- // with a reference could be exported).
- SmallPtrSet<GlobalValue *, 8> Used;
- collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
- bool LocalIsUsed =
- llvm::any_of(Used, [](GlobalValue *V) { return V->hasLocalLinkage(); });
- if (!LocalIsUsed)
- return true;
-
- // Walk all the instructions in the module and find if one is inline ASM
- auto HasInlineAsm = llvm::any_of(M, [](const Function &F) {
- return llvm::any_of(instructions(F), [](const Instruction &I) {
- const CallInst *CallI = dyn_cast<CallInst>(&I);
- if (!CallI)
- return false;
- return CallI->isInlineAsm();
- });
- });
- return !HasInlineAsm;
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
diff --git a/lib/Analysis/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 9bb1048ea8ba..ed03406ca8c6 100644
--- a/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -131,7 +131,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
return AAResultBase::getModRefInfo(CS, Loc);
}
-ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> &AM) {
+ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
return ObjCARCAAResult(F.getParent()->getDataLayout());
}
diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
index 3dc1463b8d8b..1e75c0824d03 100644
--- a/lib/Analysis/ObjCARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -96,43 +96,47 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
// One argument.
const Argument *A0 = &*AI++;
- if (AI == AE)
+ if (AI == AE) {
// Argument is a pointer.
- if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
- Type *ETy = PTy->getElementType();
- // Argument is i8*.
- if (ETy->isIntegerTy(8))
+ PointerType *PTy = dyn_cast<PointerType>(A0->getType());
+ if (!PTy)
+ return ARCInstKind::CallOrUser;
+
+ Type *ETy = PTy->getElementType();
+ // Argument is i8*.
+ if (ETy->isIntegerTy(8))
+ return StringSwitch<ARCInstKind>(F->getName())
+ .Case("objc_retain", ARCInstKind::Retain)
+ .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
+ .Case("objc_unsafeClaimAutoreleasedReturnValue", ARCInstKind::ClaimRV)
+ .Case("objc_retainBlock", ARCInstKind::RetainBlock)
+ .Case("objc_release", ARCInstKind::Release)
+ .Case("objc_autorelease", ARCInstKind::Autorelease)
+ .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
+ .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
+ .Case("objc_retainedObject", ARCInstKind::NoopCast)
+ .Case("objc_unretainedObject", ARCInstKind::NoopCast)
+ .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
+ .Case("objc_retain_autorelease", ARCInstKind::FusedRetainAutorelease)
+ .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
+ .Case("objc_retainAutoreleaseReturnValue",
+ ARCInstKind::FusedRetainAutoreleaseRV)
+ .Case("objc_sync_enter", ARCInstKind::User)
+ .Case("objc_sync_exit", ARCInstKind::User)
+ .Default(ARCInstKind::CallOrUser);
+
+ // Argument is i8**
+ if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+ if (Pte->getElementType()->isIntegerTy(8))
return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_retain", ARCInstKind::Retain)
- .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
- .Case("objc_unsafeClaimAutoreleasedReturnValue",
- ARCInstKind::ClaimRV)
- .Case("objc_retainBlock", ARCInstKind::RetainBlock)
- .Case("objc_release", ARCInstKind::Release)
- .Case("objc_autorelease", ARCInstKind::Autorelease)
- .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
- .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
- .Case("objc_retainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedObject", ARCInstKind::NoopCast)
- .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
- .Case("objc_retain_autorelease",
- ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
- .Case("objc_retainAutoreleaseReturnValue",
- ARCInstKind::FusedRetainAutoreleaseRV)
- .Case("objc_sync_enter", ARCInstKind::User)
- .Case("objc_sync_exit", ARCInstKind::User)
+ .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
+ .Case("objc_loadWeak", ARCInstKind::LoadWeak)
+ .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
.Default(ARCInstKind::CallOrUser);
- // Argument is i8**
- if (PointerType *Pte = dyn_cast<PointerType>(ETy))
- if (Pte->getElementType()->isIntegerTy(8))
- return StringSwitch<ARCInstKind>(F->getName())
- .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
- .Case("objc_loadWeak", ARCInstKind::LoadWeak)
- .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
- .Default(ARCInstKind::CallOrUser);
- }
+ // Anything else with one argument.
+ return ARCInstKind::CallOrUser;
+ }
// Two arguments, first is i8**.
const Argument *A1 = &*AI++;
@@ -180,6 +184,7 @@ static bool isInertIntrinsic(unsigned ID) {
// TODO: Make this into a covered switch.
switch (ID) {
case Intrinsic::returnaddress:
+ case Intrinsic::addressofreturnaddress:
case Intrinsic::frameaddress:
case Intrinsic::stacksave:
case Intrinsic::stackrestore:
diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp
index e979ba2531e4..fa8b07d61b01 100644
--- a/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -13,30 +13,204 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h"
using namespace llvm;
-Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(Value *V) {
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+ : F(F), BFI(nullptr) {
+ if (!F->getContext().getDiagnosticHotnessRequested())
+ return;
+
+ // First create a dominator tree.
+ DominatorTree DT;
+ DT.recalculate(*F);
+
+ // Generate LoopInfo from it.
+ LoopInfo LI;
+ LI.analyze(DT);
+
+ // Then compute BranchProbabilityInfo.
+ BranchProbabilityInfo BPI;
+ BPI.calculate(*F, LI);
+
+ // Finally compute BFI.
+ OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+ BFI = OwnedBFI.get();
+}
+
+Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
if (!BFI)
return None;
return BFI->getBlockProfileCount(cast<BasicBlock>(V));
}
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
+ static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
+ assert(io.outputting() && "input not yet implemented");
+
+ if (io.mapTag("!Passed", OptDiag->getKind() == DK_OptimizationRemark))
+ ;
+ else if (io.mapTag("!Missed",
+ OptDiag->getKind() == DK_OptimizationRemarkMissed))
+ ;
+ else if (io.mapTag("!Analysis",
+ OptDiag->getKind() == DK_OptimizationRemarkAnalysis))
+ ;
+ else if (io.mapTag("!AnalysisFPCommute",
+ OptDiag->getKind() ==
+ DK_OptimizationRemarkAnalysisFPCommute))
+ ;
+ else if (io.mapTag("!AnalysisAliasing",
+ OptDiag->getKind() ==
+ DK_OptimizationRemarkAnalysisAliasing))
+ ;
+ else
+ llvm_unreachable("todo");
+
+ // These are read-only for now.
+ DebugLoc DL = OptDiag->getDebugLoc();
+ StringRef FN = GlobalValue::getRealLinkageName(
+ OptDiag->getFunction().getName());
+
+ StringRef PassName(OptDiag->PassName);
+ io.mapRequired("Pass", PassName);
+ io.mapRequired("Name", OptDiag->RemarkName);
+ if (!io.outputting() || DL)
+ io.mapOptional("DebugLoc", DL);
+ io.mapRequired("Function", FN);
+ io.mapOptional("Hotness", OptDiag->Hotness);
+ io.mapOptional("Args", OptDiag->Args);
+ }
+};
+
+template <> struct MappingTraits<DebugLoc> {
+ static void mapping(IO &io, DebugLoc &DL) {
+ assert(io.outputting() && "input not yet implemented");
+
+ auto *Scope = cast<DIScope>(DL.getScope());
+ StringRef File = Scope->getFilename();
+ unsigned Line = DL.getLine();
+ unsigned Col = DL.getCol();
+
+ io.mapRequired("File", File);
+ io.mapRequired("Line", Line);
+ io.mapRequired("Column", Col);
+ }
+
+ static const bool flow = true;
+};
+
+// Implement this as a mapping for now to get proper quotation for the value.
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
+ static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
+ assert(io.outputting() && "input not yet implemented");
+ io.mapRequired(A.Key.data(), A.Val);
+ if (A.DLoc)
+ io.mapOptional("DebugLoc", A.DLoc);
+ }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
+
+void OptimizationRemarkEmitter::computeHotness(
+ DiagnosticInfoOptimizationBase &OptDiag) {
+ Value *V = OptDiag.getCodeRegion();
+ if (V)
+ OptDiag.setHotness(computeHotness(V));
+}
+
+void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) {
+ computeHotness(OptDiag);
+
+ yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
+ if (Out) {
+ auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiag);
+ *Out << P;
+ }
+ // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
+ // from here to clang.
+ if (!OptDiag.isVerbose() || shouldEmitVerbose())
+ F->getContext().diagnose(OptDiag);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
+ const DebugLoc &DLoc,
+ const Value *V,
+ const Twine &Msg) {
+ LLVMContext &Ctx = F->getContext();
+ Ctx.diagnose(OptimizationRemark(PassName, *F, DLoc, Msg, computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
+ Loop *L,
+ const Twine &Msg) {
+ emitOptimizationRemark(PassName, L->getStartLoc(), L->getHeader(), Msg);
+}
+
void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
- const char *PassName, const DebugLoc &DLoc, Value *V, const Twine &Msg) {
+ const char *PassName, const DebugLoc &DLoc, const Value *V,
+ const Twine &Msg, bool IsVerbose) {
LLVMContext &Ctx = F->getContext();
- Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, *F, DLoc, Msg,
- computeHotness(V)));
+ if (!IsVerbose || shouldEmitVerbose())
+ Ctx.diagnose(
+ OptimizationRemarkMissed(PassName, *F, DLoc, Msg, computeHotness(V)));
}
void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
+ const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
+ emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg,
+ IsVerbose);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
+ const char *PassName, const DebugLoc &DLoc, const Value *V,
+ const Twine &Msg, bool IsVerbose) {
+ LLVMContext &Ctx = F->getContext();
+ if (!IsVerbose || shouldEmitVerbose())
+ Ctx.diagnose(
+ OptimizationRemarkAnalysis(PassName, *F, DLoc, Msg, computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
+ const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
+ emitOptimizationRemarkAnalysis(PassName, L->getStartLoc(), L->getHeader(),
+ Msg, IsVerbose);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute(
+ const char *PassName, const DebugLoc &DLoc, const Value *V,
+ const Twine &Msg) {
+ LLVMContext &Ctx = F->getContext();
+ Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, *F, DLoc, Msg,
+ computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
+ const char *PassName, const DebugLoc &DLoc, const Value *V,
+ const Twine &Msg) {
+ LLVMContext &Ctx = F->getContext();
+ Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, *F, DLoc, Msg,
+ computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
const char *PassName, Loop *L, const Twine &Msg) {
- emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg);
+ emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(),
+ L->getHeader(), Msg);
}
OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
@@ -63,10 +237,11 @@ void OptimizationRemarkEmitterWrapperPass::getAnalysisUsage(
AU.setPreservesAll();
}
-char OptimizationRemarkEmitterAnalysis::PassID;
+AnalysisKey OptimizationRemarkEmitterAnalysis::Key;
OptimizationRemarkEmitter
-OptimizationRemarkEmitterAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+OptimizationRemarkEmitterAnalysis::run(Function &F,
+ FunctionAnalysisManager &AM) {
BlockFrequencyInfo *BFI;
if (F.getContext().getDiagnosticHotnessRequested())
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index b4aad74d50dc..84ecd4ab9809 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -62,8 +62,7 @@ static bool VerifySubExpr(Value *Expr,
// If it's an instruction, it is either in Tmp or its operands recursively
// are.
- SmallVectorImpl<Instruction*>::iterator Entry =
- std::find(InstInputs.begin(), InstInputs.end(), I);
+ SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
if (Entry != InstInputs.end()) {
InstInputs.erase(Entry);
return true;
@@ -126,8 +125,7 @@ static void RemoveInstInputs(Value *V,
if (!I) return;
// If the instruction is in the InstInputs list, remove it.
- SmallVectorImpl<Instruction*>::iterator Entry =
- std::find(InstInputs.begin(), InstInputs.end(), I);
+ SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
if (Entry != InstInputs.end()) {
InstInputs.erase(Entry);
return;
@@ -150,8 +148,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
if (!Inst) return V;
// Determine whether 'Inst' is an input to our PHI translatable expression.
- bool isInput =
- std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end();
+ bool isInput = is_contained(InstInputs, Inst);
// Handle inputs instructions if needed.
if (isInput) {
@@ -165,7 +162,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// translated, we need to incorporate the value into the expression or fail.
// In either case, the instruction itself isn't an input any longer.
- InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+ InstInputs.erase(find(InstInputs, Inst));
// If this is a PHI, go ahead and translate it.
if (PHINode *PN = dyn_cast<PHINode>(Inst))
@@ -272,8 +269,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
isNSW = isNUW = false;
// If the old 'LHS' was an input, add the new 'LHS' as an input.
- if (std::find(InstInputs.begin(), InstInputs.end(), BOp) !=
- InstInputs.end()) {
+ if (is_contained(InstInputs, BOp)) {
RemoveInstInputs(BOp, InstInputs);
AddAsInput(LHS);
}
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 73550805d5ba..cb9438a2f928 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -44,7 +44,7 @@ FunctionPass* llvm::createPostDomTree() {
return new PostDominatorTreeWrapperPass();
}
-char PostDominatorTreeAnalysis::PassID;
+AnalysisKey PostDominatorTreeAnalysis::Key;
PostDominatorTree PostDominatorTreeAnalysis::run(Function &F,
FunctionAnalysisManager &) {
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 9cf99af49581..16d3614c14c6 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -12,7 +12,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ProfileSummary.h"
@@ -63,10 +65,10 @@ void ProfileSummaryInfo::computeSummary() {
Summary.reset(ProfileSummary::getFromMD(SummaryMD));
}
-// Returns true if the function is a hot function. If it returns false, it
-// either means it is not hot or it is unknown whether F is hot or not (for
-// example, no profile data is available).
-bool ProfileSummaryInfo::isHotFunction(const Function *F) {
+/// Returns true if the function's entry is hot. If it returns false, it
+/// either means it is not hot or it is unknown whether it is hot or not (for
+/// example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
computeSummary();
if (!F || !Summary)
return false;
@@ -74,15 +76,13 @@ bool ProfileSummaryInfo::isHotFunction(const Function *F) {
// FIXME: The heuristic used below for determining hotness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return (FunctionCount &&
- FunctionCount.getValue() >=
- (uint64_t)(0.3 * (double)Summary->getMaxFunctionCount()));
+ return FunctionCount && isHotCount(FunctionCount.getValue());
}
-// Returns true if the function is a cold function. If it returns false, it
-// either means it is not cold or it is unknown whether F is cold or not (for
-// example, no profile data is available).
-bool ProfileSummaryInfo::isColdFunction(const Function *F) {
+/// Returns true if the function's entry is a cold. If it returns false, it
+/// either means it is not cold or it is unknown whether it is cold or not (for
+/// example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
computeSummary();
if (!F)
return false;
@@ -95,12 +95,10 @@ bool ProfileSummaryInfo::isColdFunction(const Function *F) {
// FIXME: The heuristic used below for determining coldness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return (FunctionCount &&
- FunctionCount.getValue() <=
- (uint64_t)(0.01 * (double)Summary->getMaxFunctionCount()));
+ return FunctionCount && isColdCount(FunctionCount.getValue());
}
-// Compute the hot and cold thresholds.
+/// Compute the hot and cold thresholds.
void ProfileSummaryInfo::computeThresholds() {
if (!Summary)
computeSummary();
@@ -125,10 +123,22 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
return ColdCountThreshold && C <= ColdCountThreshold.getValue();
}
-ProfileSummaryInfo *ProfileSummaryInfoWrapperPass::getPSI(Module &M) {
- if (!PSI)
- PSI.reset(new ProfileSummaryInfo(M));
- return PSI.get();
+bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
+ auto Count = BFI->getBlockProfileCount(B);
+ if (Count && isHotCount(*Count))
+ return true;
+ // Use extractProfTotalWeight to get BB count.
+ // For Sample PGO, BFI may not provide accurate BB count due to errors
+ // magnified during sample count propagation. This serves as a backup plan
+ // to ensure all hot BB will not be missed.
+ // The query currently has false positives as branch instruction cloning does
+ // not update/scale branch weights. Unlike false negatives, this will not cause
+ // performance problem.
+ uint64_t TotalCount;
+ if (B->getTerminator()->extractProfTotalWeight(TotalCount) &&
+ isHotCount(TotalCount))
+ return true;
+ return false;
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
@@ -139,25 +149,33 @@ ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass()
initializeProfileSummaryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
-char ProfileSummaryAnalysis::PassID;
+bool ProfileSummaryInfoWrapperPass::doInitialization(Module &M) {
+ PSI.reset(new ProfileSummaryInfo(M));
+ return false;
+}
+
+bool ProfileSummaryInfoWrapperPass::doFinalization(Module &M) {
+ PSI.reset();
+ return false;
+}
+
+AnalysisKey ProfileSummaryAnalysis::Key;
ProfileSummaryInfo ProfileSummaryAnalysis::run(Module &M,
ModuleAnalysisManager &) {
return ProfileSummaryInfo(M);
}
-// FIXME: This only tests isHotFunction and isColdFunction and not the
-// isHotCount and isColdCount calls.
PreservedAnalyses ProfileSummaryPrinterPass::run(Module &M,
- AnalysisManager<Module> &AM) {
+ ModuleAnalysisManager &AM) {
ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
OS << "Functions in " << M.getName() << " with hot/cold annotations: \n";
for (auto &F : M) {
OS << F.getName();
- if (PSI.isHotFunction(&F))
- OS << " :hot ";
- else if (PSI.isColdFunction(&F))
- OS << " :cold ";
+ if (PSI.isFunctionEntryHot(&F))
+ OS << " :hot entry ";
+ else if (PSI.isFunctionEntryCold(&F))
+ OS << " :cold entry ";
OS << "\n";
}
return PreservedAnalyses::all();
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 6860a3e63953..8c084ddd2266 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -54,8 +54,7 @@ static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
clEnumValN(Region::PrintBB, "bb",
"print regions in detail with block_iterator"),
clEnumValN(Region::PrintRN, "rn",
- "print regions in detail with element_iterator"),
- clEnumValEnd));
+ "print regions in detail with element_iterator")));
//===----------------------------------------------------------------------===//
@@ -182,9 +181,9 @@ namespace llvm {
// RegionInfoAnalysis implementation
//
-char RegionInfoAnalysis::PassID;
+AnalysisKey RegionInfoAnalysis::Key;
-RegionInfo RegionInfoAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+RegionInfo RegionInfoAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
RegionInfo RI;
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
@@ -206,7 +205,7 @@ PreservedAnalyses RegionInfoPrinterPass::run(Function &F,
}
PreservedAnalyses RegionInfoVerifierPass::run(Function &F,
- AnalysisManager<Function> &AM) {
+ FunctionAnalysisManager &AM) {
AM.getResult<RegionInfoAnalysis>(F).verifyAnalysis();
return PreservedAnalyses::all();
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 5e1cdd48a78e..7358aa6810a1 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -10,7 +10,7 @@
// This file implements RegionPass and RGPassManager. All region optimization
// and transformation passes are derived from RegionPass. RGPassManager is
// responsible for managing RegionPasses.
-// most of these codes are COPY from LoopPass.cpp
+// Most of this code has been COPIED from LoopPass.cpp
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
@@ -64,9 +64,7 @@ bool RGPassManager::runOnFunction(Function &F) {
return false;
// Initialization
- for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
- I != E; ++I) {
- Region *R = *I;
+ for (Region *R : RQ) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
RegionPass *RP = (RegionPass *)getContainedPass(Index);
Changed |= RP->doInitialization(R, *this);
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e42a4b574d90..5e566bcdaff4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -61,6 +61,8 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -120,6 +122,16 @@ static cl::opt<bool>
cl::desc("Verify no dangling value in ScalarEvolution's "
"ExprValueMap (slow)"));
+static cl::opt<unsigned> MulOpsInlineThreshold(
+ "scev-mulops-inline-threshold", cl::Hidden,
+ cl::desc("Threshold for inlining multiplication operands into a SCEV"),
+ cl::init(1000));
+
+static cl::opt<unsigned>
+ MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive compare complexity"),
+ cl::init(32));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -447,180 +459,233 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
// SCEV Utilities
//===----------------------------------------------------------------------===//
-namespace {
-/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
-/// than the complexity of the RHS. This comparator is used to canonicalize
-/// expressions.
-class SCEVComplexityCompare {
- const LoopInfo *const LI;
-public:
- explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+/// Compare the two values \p LV and \p RV in terms of their "complexity" where
+/// "complexity" is a partial (and somewhat ad-hoc) relation used to order
+/// operands in SCEV expressions. \p EqCache is a set of pairs of values that
+/// have been previously deemed to be "equally complex" by this routine. It is
+/// intended to avoid exponential time complexity in cases like:
+///
+/// %a = f(%x, %y)
+/// %b = f(%a, %a)
+/// %c = f(%b, %b)
+///
+/// %d = f(%x, %y)
+/// %e = f(%d, %d)
+/// %f = f(%e, %e)
+///
+/// CompareValueComplexity(%f, %c)
+///
+/// Since we do not continue running this routine on expression trees once we
+/// have seen unequal values, there is no need to track them in the cache.
+static int
+CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
+ const LoopInfo *const LI, Value *LV, Value *RV,
+ unsigned Depth) {
+ if (Depth > MaxCompareDepth || EqCache.count({LV, RV}))
+ return 0;
+
+ // Order pointer values after integer values. This helps SCEVExpander form
+ // GEPs.
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
- // Return true or false if LHS is less than, or at least RHS, respectively.
- bool operator()(const SCEV *LHS, const SCEV *RHS) const {
- return compare(LHS, RHS) < 0;
+ // Compare getValueID values.
+ unsigned LID = LV->getValueID(), RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
+
+ // Sort arguments by their position.
+ if (const auto *LA = dyn_cast<Argument>(LV)) {
+ const auto *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
}
- // Return negative, zero, or positive, if LHS is less than, equal to, or
- // greater than RHS, respectively. A three-way result allows recursive
- // comparisons to be more efficient.
- int compare(const SCEV *LHS, const SCEV *RHS) const {
- // Fast-path: SCEVs are uniqued so we can do a quick equality check.
- if (LHS == RHS)
- return 0;
-
- // Primarily, sort the SCEVs by their getSCEVType().
- unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
- if (LType != RType)
- return (int)LType - (int)RType;
-
- // Aside from the getSCEVType() ordering, the particular ordering
- // isn't very important except that it's beneficial to be consistent,
- // so that (a + b) and (b + a) don't end up as different expressions.
- switch (static_cast<SCEVTypes>(LType)) {
- case scUnknown: {
- const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
- const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
-
- // Sort SCEVUnknown values with some loose heuristics. TODO: This is
- // not as complete as it could be.
- const Value *LV = LU->getValue(), *RV = RU->getValue();
-
- // Order pointer values after integer values. This helps SCEVExpander
- // form GEPs.
- bool LIsPointer = LV->getType()->isPointerTy(),
- RIsPointer = RV->getType()->isPointerTy();
- if (LIsPointer != RIsPointer)
- return (int)LIsPointer - (int)RIsPointer;
-
- // Compare getValueID values.
- unsigned LID = LV->getValueID(),
- RID = RV->getValueID();
- if (LID != RID)
- return (int)LID - (int)RID;
-
- // Sort arguments by their position.
- if (const Argument *LA = dyn_cast<Argument>(LV)) {
- const Argument *RA = cast<Argument>(RV);
- unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
- return (int)LArgNo - (int)RArgNo;
- }
+ if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
+ const auto *RGV = cast<GlobalValue>(RV);
- // For instructions, compare their loop depth, and their operand
- // count. This is pretty loose.
- if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
- const Instruction *RInst = cast<Instruction>(RV);
-
- // Compare loop depths.
- const BasicBlock *LParent = LInst->getParent(),
- *RParent = RInst->getParent();
- if (LParent != RParent) {
- unsigned LDepth = LI->getLoopDepth(LParent),
- RDepth = LI->getLoopDepth(RParent);
- if (LDepth != RDepth)
- return (int)LDepth - (int)RDepth;
- }
+ const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
+ auto LT = GV->getLinkage();
+ return !(GlobalValue::isPrivateLinkage(LT) ||
+ GlobalValue::isInternalLinkage(LT));
+ };
- // Compare the number of operands.
- unsigned LNumOps = LInst->getNumOperands(),
- RNumOps = RInst->getNumOperands();
- return (int)LNumOps - (int)RNumOps;
- }
+ // Use the names to distinguish the two values, but only if the
+ // names are semantically important.
+ if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
+ return LGV->getName().compare(RGV->getName());
+ }
+
+ // For instructions, compare their loop depth, and their operand count. This
+ // is pretty loose.
+ if (const auto *LInst = dyn_cast<Instruction>(LV)) {
+ const auto *RInst = cast<Instruction>(RV);
- return 0;
+ // Compare loop depths.
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
}
- case scConstant: {
- const SCEVConstant *LC = cast<SCEVConstant>(LHS);
- const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+ // Compare the number of operands.
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
- // Compare constant values.
- const APInt &LA = LC->getAPInt();
- const APInt &RA = RC->getAPInt();
- unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
- if (LBitWidth != RBitWidth)
- return (int)LBitWidth - (int)RBitWidth;
- return LA.ult(RA) ? -1 : 1;
+ for (unsigned Idx : seq(0u, LNumOps)) {
+ int Result =
+ CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx),
+ RInst->getOperand(Idx), Depth + 1);
+ if (Result != 0)
+ return Result;
}
+ }
- case scAddRecExpr: {
- const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
- const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+ EqCache.insert({LV, RV});
+ return 0;
+}
- // Compare addrec loop depths.
- const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
- if (LLoop != RLoop) {
- unsigned LDepth = LLoop->getLoopDepth(),
- RDepth = RLoop->getLoopDepth();
- if (LDepth != RDepth)
- return (int)LDepth - (int)RDepth;
- }
+// Return negative, zero, or positive, if LHS is less than, equal to, or greater
+// than RHS, respectively. A three-way result allows recursive comparisons to be
+// more efficient.
+static int CompareSCEVComplexity(
+ SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
+ const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
+ unsigned Depth = 0) {
+ // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+ if (LHS == RHS)
+ return 0;
- // Addrec complexity grows with operand count.
- unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
- if (LNumOps != RNumOps)
- return (int)LNumOps - (int)RNumOps;
+ // Primarily, sort the SCEVs by their getSCEVType().
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
- // Lexicographically compare.
- for (unsigned i = 0; i != LNumOps; ++i) {
- long X = compare(LA->getOperand(i), RA->getOperand(i));
- if (X != 0)
- return X;
- }
+ if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS}))
+ return 0;
+ // Aside from the getSCEVType() ordering, the particular ordering
+ // isn't very important except that it's beneficial to be consistent,
+ // so that (a + b) and (b + a) don't end up as different expressions.
+ switch (static_cast<SCEVTypes>(LType)) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+ const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+ SmallSet<std::pair<Value *, Value *>, 8> EqCache;
+ int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(),
+ Depth + 1);
+ if (X == 0)
+ EqCacheSCEV.insert({LHS, RHS});
+ return X;
+ }
- return 0;
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+ const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+ // Compare constant values.
+ const APInt &LA = LC->getAPInt();
+ const APInt &RA = RC->getAPInt();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
+ }
+
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
}
- case scAddExpr:
- case scMulExpr:
- case scSMaxExpr:
- case scUMaxExpr: {
- const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
- const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
-
- // Lexicographically compare n-ary expressions.
- unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
- if (LNumOps != RNumOps)
- return (int)LNumOps - (int)RNumOps;
-
- for (unsigned i = 0; i != LNumOps; ++i) {
- if (i >= RNumOps)
- return 1;
- long X = compare(LC->getOperand(i), RC->getOperand(i));
- if (X != 0)
- return X;
- }
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
+ RA->getOperand(i), Depth + 1);
+ if (X != 0)
+ return X;
}
+ EqCacheSCEV.insert({LHS, RHS});
+ return 0;
+ }
- case scUDivExpr: {
- const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
- const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+ const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
- // Lexicographically compare udiv expressions.
- long X = compare(LC->getLHS(), RC->getLHS());
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
+ RC->getOperand(i), Depth + 1);
if (X != 0)
return X;
- return compare(LC->getRHS(), RC->getRHS());
}
+ EqCacheSCEV.insert({LHS, RHS});
+ return 0;
+ }
- case scTruncate:
- case scZeroExtend:
- case scSignExtend: {
- const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
- const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+ const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
- // Compare cast expressions by operand.
- return compare(LC->getOperand(), RC->getOperand());
- }
+ // Lexicographically compare udiv expressions.
+ int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
+ Depth + 1);
+ if (X != 0)
+ return X;
+ X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(),
+ Depth + 1);
+ if (X == 0)
+ EqCacheSCEV.insert({LHS, RHS});
+ return X;
+ }
- case scCouldNotCompute:
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- }
- llvm_unreachable("Unknown SCEV kind!");
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+ const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+ // Compare cast expressions by operand.
+ int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
+ RC->getOperand(), Depth + 1);
+ if (X == 0)
+ EqCacheSCEV.insert({LHS, RHS});
+ return X;
}
-};
-} // end anonymous namespace
+
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+}
/// Given a list of SCEV objects, order them by their complexity, and group
/// objects of the same complexity together by value. When this routine is
@@ -635,17 +700,22 @@ public:
static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
LoopInfo *LI) {
if (Ops.size() < 2) return; // Noop
+
+ SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache;
if (Ops.size() == 2) {
// This is the common case, which also happens to be trivially simple.
// Special case it.
const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
- if (SCEVComplexityCompare(LI)(RHS, LHS))
+ if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0)
std::swap(LHS, RHS);
return;
}
// Do the rough sort by complexity.
- std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+ std::stable_sort(Ops.begin(), Ops.end(),
+ [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) {
+ return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0;
+ });
// Now that we are sorted by complexity, group elements of the same
// complexity. Note that this is, at worst, N^2, but the vector is likely to
@@ -2518,6 +2588,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Idx < Ops.size()) {
bool DeletedMul = false;
while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+ if (Ops.size() > MulOpsInlineThreshold)
+ break;
// If we have an mul, expand the mul operands onto the end of the operands
// list.
Ops.erase(Ops.begin()+Idx);
@@ -2970,9 +3042,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
}
const SCEV *
-ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
- const SmallVectorImpl<const SCEV *> &IndexExprs,
- bool InBounds) {
+ScalarEvolution::getGEPExpr(GEPOperator *GEP,
+ const SmallVectorImpl<const SCEV *> &IndexExprs) {
+ const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
// getSCEV(Base)->getType() has the same address space as Base->getType()
// because SCEV::getType() preserves the address space.
Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
@@ -2981,12 +3053,13 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
// flow and the no-overflow bits may not be valid for the expression in any
// context. This can be fixed similarly to how these flags are handled for
// adds.
- SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+ SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
+ : SCEV::FlagAnyWrap;
const SCEV *TotalOffset = getZero(IntPtrTy);
- // The address space is unimportant. The first thing we do on CurTy is getting
+ // The array size is unimportant. The first thing we do on CurTy is getting
// its element type.
- Type *CurTy = PointerType::getUnqual(PointeeType);
+ Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
for (const SCEV *IndexExpr : IndexExprs) {
// Compute the (potentially symbolic) offset in bytes for this index.
if (StructType *STy = dyn_cast<StructType>(CurTy)) {
@@ -3311,75 +3384,47 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
return CouldNotCompute.get();
}
-
bool ScalarEvolution::checkValidity(const SCEV *S) const {
- // Helper class working with SCEVTraversal to figure out if a SCEV contains
- // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
- // is set iff if find such SCEVUnknown.
- //
- struct FindInvalidSCEVUnknown {
- bool FindOne;
- FindInvalidSCEVUnknown() { FindOne = false; }
- bool follow(const SCEV *S) {
- switch (static_cast<SCEVTypes>(S->getSCEVType())) {
- case scConstant:
- return false;
- case scUnknown:
- if (!cast<SCEVUnknown>(S)->getValue())
- FindOne = true;
- return false;
- default:
- return true;
- }
- }
- bool isDone() const { return FindOne; }
- };
-
- FindInvalidSCEVUnknown F;
- SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
- ST.visitAll(S);
+ bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
+ auto *SU = dyn_cast<SCEVUnknown>(S);
+ return SU && SU->getValue() == nullptr;
+ });
- return !F.FindOne;
-}
-
-namespace {
-// Helper class working with SCEVTraversal to figure out if a SCEV contains
-// a sub SCEV of scAddRecExpr type. FindInvalidSCEVUnknown::FoundOne is set
-// iff if such sub scAddRecExpr type SCEV is found.
-struct FindAddRecurrence {
- bool FoundOne;
- FindAddRecurrence() : FoundOne(false) {}
-
- bool follow(const SCEV *S) {
- switch (static_cast<SCEVTypes>(S->getSCEVType())) {
- case scAddRecExpr:
- FoundOne = true;
- case scConstant:
- case scUnknown:
- case scCouldNotCompute:
- return false;
- default:
- return true;
- }
- }
- bool isDone() const { return FoundOne; }
-};
+ return !ContainsNulls;
}
bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
- HasRecMapType::iterator I = HasRecMap.find_as(S);
+ HasRecMapType::iterator I = HasRecMap.find(S);
if (I != HasRecMap.end())
return I->second;
- FindAddRecurrence F;
- SCEVTraversal<FindAddRecurrence> ST(F);
- ST.visitAll(S);
- HasRecMap.insert({S, F.FoundOne});
- return F.FoundOne;
+ bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>);
+ HasRecMap.insert({S, FoundAddRec});
+ return FoundAddRec;
+}
+
+/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
+/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
+/// offset I, then return {S', I}, else return {\p S, nullptr}.
+static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
+ const auto *Add = dyn_cast<SCEVAddExpr>(S);
+ if (!Add)
+ return {S, nullptr};
+
+ if (Add->getNumOperands() != 2)
+ return {S, nullptr};
+
+ auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
+ if (!ConstOp)
+ return {S, nullptr};
+
+ return {Add->getOperand(1), ConstOp->getValue()};
}
-/// Return the Value set from S.
-SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) {
+/// Return the ValueOffsetPair set for \p S. \p S can be represented
+/// by the value and offset from any ValueOffsetPair in the set.
+SetVector<ScalarEvolution::ValueOffsetPair> *
+ScalarEvolution::getSCEVValues(const SCEV *S) {
ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
if (SI == ExprValueMap.end())
return nullptr;
@@ -3387,24 +3432,31 @@ SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) {
if (VerifySCEVMap) {
// Check there is no dangling Value in the set returned.
for (const auto &VE : SI->second)
- assert(ValueExprMap.count(VE));
+ assert(ValueExprMap.count(VE.first));
}
#endif
return &SI->second;
}
-/// Erase Value from ValueExprMap and ExprValueMap. If ValueExprMap.erase(V) is
-/// not used together with forgetMemoizedResults(S), eraseValueFromMap should be
-/// used instead to ensure whenever V->S is removed from ValueExprMap, V is also
-/// removed from the set of ExprValueMap[S].
+/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
+/// cannot be used separately. eraseValueFromMap should be used to remove
+/// V from ValueExprMap and ExprValueMap at the same time.
void ScalarEvolution::eraseValueFromMap(Value *V) {
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
- SetVector<Value *> *SV = getSCEVValues(S);
- // Remove V from the set of ExprValueMap[S]
- if (SV)
- SV->remove(V);
+ // Remove {V, 0} from the set of ExprValueMap[S]
+ if (SetVector<ValueOffsetPair> *SV = getSCEVValues(S))
+ SV->remove({V, nullptr});
+
+ // Remove {V, Offset} from the set of ExprValueMap[Stripped]
+ const SCEV *Stripped;
+ ConstantInt *Offset;
+ std::tie(Stripped, Offset) = splitAddExpr(S);
+ if (Offset != nullptr) {
+ if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped))
+ SV->remove({V, Offset});
+ }
ValueExprMap.erase(V);
}
}
@@ -3419,11 +3471,26 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
S = createSCEV(V);
// During PHI resolution, it is possible to create two SCEVs for the same
// V, so it is needed to double check whether V->S is inserted into
- // ValueExprMap before insert S->V into ExprValueMap.
+ // ValueExprMap before insert S->{V, 0} into ExprValueMap.
std::pair<ValueExprMapType::iterator, bool> Pair =
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- if (Pair.second)
- ExprValueMap[S].insert(V);
+ if (Pair.second) {
+ ExprValueMap[S].insert({V, nullptr});
+
+ // If S == Stripped + Offset, add Stripped -> {V, Offset} into
+ // ExprValueMap.
+ const SCEV *Stripped = S;
+ ConstantInt *Offset = nullptr;
+ std::tie(Stripped, Offset) = splitAddExpr(S);
+ // If stripped is SCEVUnknown, don't bother to save
+ // Stripped -> {V, offset}. It doesn't simplify and sometimes even
+ // increase the complexity of the expansion code.
+ // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
+ // because it may generate add/sub instead of GEP in SCEV expansion.
+ if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
+ !isa<GetElementPtrInst>(V))
+ ExprValueMap[Stripped].insert({V, Offset});
+ }
}
return S;
}
@@ -3436,8 +3503,8 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
const SCEV *S = I->second;
if (checkValidity(S))
return S;
+ eraseValueFromMap(V);
forgetMemoizedResults(S);
- ValueExprMap.erase(I);
}
return nullptr;
}
@@ -3675,8 +3742,8 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
if (!isa<PHINode>(I) ||
!isa<SCEVUnknown>(Old) ||
(I != PN && Old == SymName)) {
+ eraseValueFromMap(It->first);
forgetMemoizedResults(Old);
- ValueExprMap.erase(It);
}
}
@@ -4055,7 +4122,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// to create an AddRecExpr for this PHI node. We can not keep this temporary
// as it will prevent later (possibly simpler) SCEV expressions to be added
// to the ValueExprMap.
- ValueExprMap.erase(PN);
+ eraseValueFromMap(PN);
}
return nullptr;
@@ -4168,7 +4235,9 @@ static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
}
const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
- if (PN->getNumIncomingValues() == 2) {
+ auto IsReachable =
+ [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
+ if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
const Loop *L = LI.getLoopFor(PN->getParent());
// We don't want to break LCSSA, even in a SCEV expression tree.
@@ -4244,7 +4313,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
std::swap(LHS, RHS);
- // fall through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
// a >s b ? a+x : b+x -> smax(a, b)+x
@@ -4267,7 +4336,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
std::swap(LHS, RHS);
- // fall through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
// a >u b ? a+x : b+x -> umax(a, b)+x
@@ -4332,9 +4401,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
SmallVector<const SCEV *, 4> IndexExprs;
for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
IndexExprs.push_back(getSCEV(*Index));
- return getGEPExpr(GEP->getSourceElementType(),
- getSCEV(GEP->getPointerOperand()),
- IndexExprs, GEP->isInBounds());
+ return getGEPExpr(GEP, IndexExprs);
}
uint32_t
@@ -4612,19 +4679,18 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
- ConstantRange ZExtMaxBECountRange =
- MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2);
ConstantRange StepSRange = getSignedRange(Step);
- ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2);
ConstantRange StartURange = getUnsignedRange(Start);
ConstantRange EndURange =
StartURange.add(MaxBECountRange.multiply(StepSRange));
// Check for unsigned overflow.
- ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2 + 1);
- ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2);
+ ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2);
if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
ZExtEndURange) {
APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
@@ -4644,8 +4710,8 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
// Check for signed overflow. This must be done with ConstantRange
// arithmetic because we could be called from within the ScalarEvolution
// overflow checking code.
- ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2 + 1);
- ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2);
+ ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2);
if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
SExtEndSRange) {
APInt Min =
@@ -4909,17 +4975,33 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
}
-bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) {
- auto Itr = LoopHasNoAbnormalExits.find(L);
- if (Itr == LoopHasNoAbnormalExits.end()) {
- auto NoAbnormalExitInBB = [&](BasicBlock *BB) {
- return all_of(*BB, [](Instruction &I) {
- return isGuaranteedToTransferExecutionToSuccessor(&I);
- });
+ScalarEvolution::LoopProperties
+ScalarEvolution::getLoopProperties(const Loop *L) {
+ typedef ScalarEvolution::LoopProperties LoopProperties;
+
+ auto Itr = LoopPropertiesCache.find(L);
+ if (Itr == LoopPropertiesCache.end()) {
+ auto HasSideEffects = [](Instruction *I) {
+ if (auto *SI = dyn_cast<StoreInst>(I))
+ return !SI->isSimple();
+
+ return I->mayHaveSideEffects();
};
- auto InsertPair = LoopHasNoAbnormalExits.insert(
- {L, all_of(L->getBlocks(), NoAbnormalExitInBB)});
+ LoopProperties LP = {/* HasNoAbnormalExits */ true,
+ /*HasNoSideEffects*/ true};
+
+ for (auto *BB : L->getBlocks())
+ for (auto &I : *BB) {
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ LP.HasNoAbnormalExits = false;
+ if (HasSideEffects(&I))
+ LP.HasNoSideEffects = false;
+ if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
+ break; // We're already as pessimistic as we can get.
+ }
+
+ auto InsertPair = LoopPropertiesCache.insert({L, LP});
assert(InsertPair.second && "We just checked!");
Itr = InsertPair.first;
}
@@ -5247,6 +5329,20 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
+static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
+ if (!ExitCount)
+ return 0;
+
+ ConstantInt *ExitConst = ExitCount->getValue();
+
+ // Guard against huge trip counts.
+ if (ExitConst->getValue().getActiveBits() > 32)
+ return 0;
+
+ // In case of integer overflow, this returns 0, which is correct.
+ return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
if (BasicBlock *ExitingBB = L->getExitingBlock())
return getSmallConstantTripCount(L, ExitingBB);
@@ -5262,17 +5358,13 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
"Exiting block must actually branch out of the loop!");
const SCEVConstant *ExitCount =
dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
- if (!ExitCount)
- return 0;
-
- ConstantInt *ExitConst = ExitCount->getValue();
-
- // Guard against huge trip counts.
- if (ExitConst->getValue().getActiveBits() > 32)
- return 0;
+ return getConstantTripCount(ExitCount);
+}
- // In case of integer overflow, this returns 0, which is correct.
- return ((unsigned)ExitConst->getZExtValue()) + 1;
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) {
+ const auto *MaxExitCount =
+ dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
+ return getConstantTripCount(MaxExitCount);
}
unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
@@ -5351,6 +5443,10 @@ const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
return getBackedgeTakenInfo(L).getMax(this);
}
+bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
+ return getBackedgeTakenInfo(L).isMaxOrZero(this);
+}
+
/// Push PHI nodes in the header of the given loop onto the given Worklist.
static void
PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
@@ -5376,7 +5472,7 @@ ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
BackedgeTakenInfo Result =
computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
- return PredicatedBackedgeTakenCounts.find(L)->second = Result;
+ return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
}
const ScalarEvolution::BackedgeTakenInfo &
@@ -5435,8 +5531,8 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// case, createNodeForPHI will perform the necessary updates on its
// own when it gets to that point.
if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+ eraseValueFromMap(It->first);
forgetMemoizedResults(Old);
- ValueExprMap.erase(It);
}
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
@@ -5451,7 +5547,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// recusive call to getBackedgeTakenInfo (on a different
// loop), which would invalidate the iterator computed
// earlier.
- return BackedgeTakenCounts.find(L)->second = Result;
+ return BackedgeTakenCounts.find(L)->second = std::move(Result);
}
void ScalarEvolution::forgetLoop(const Loop *L) {
@@ -5481,8 +5577,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
+ eraseValueFromMap(It->first);
forgetMemoizedResults(It->second);
- ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
@@ -5495,7 +5591,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
for (Loop *I : *L)
forgetLoop(I);
- LoopHasNoAbnormalExits.erase(L);
+ LoopPropertiesCache.erase(L);
}
void ScalarEvolution::forgetValue(Value *V) {
@@ -5515,8 +5611,8 @@ void ScalarEvolution::forgetValue(Value *V) {
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
+ eraseValueFromMap(It->first);
forgetMemoizedResults(It->second);
- ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
@@ -5534,14 +5630,11 @@ void ScalarEvolution::forgetValue(Value *V) {
/// caller's responsibility to specify the relevant loop exit using
/// getExact(ExitingBlock, SE).
const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(
- ScalarEvolution *SE, SCEVUnionPredicate *Preds) const {
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE,
+ SCEVUnionPredicate *Preds) const {
// If any exits were not computable, the loop is not computable.
- if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
-
- // We need exactly one computable exit.
- if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
- assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+ if (!isComplete() || ExitNotTaken.empty())
+ return SE->getCouldNotCompute();
const SCEV *BECount = nullptr;
for (auto &ENT : ExitNotTaken) {
@@ -5551,10 +5644,10 @@ ScalarEvolution::BackedgeTakenInfo::getExact(
BECount = ENT.ExactNotTaken;
else if (BECount != ENT.ExactNotTaken)
return SE->getCouldNotCompute();
- if (Preds && ENT.getPred())
- Preds->add(ENT.getPred());
+ if (Preds && !ENT.hasAlwaysTruePredicate())
+ Preds->add(ENT.Predicate.get());
- assert((Preds || ENT.hasAlwaysTruePred()) &&
+ assert((Preds || ENT.hasAlwaysTruePredicate()) &&
"Predicate should be always true!");
}
@@ -5567,7 +5660,7 @@ const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
ScalarEvolution *SE) const {
for (auto &ENT : ExitNotTaken)
- if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred())
+ if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
return ENT.ExactNotTaken;
return SE->getCouldNotCompute();
@@ -5576,21 +5669,29 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
/// getMax - Get the max backedge taken count for the loop.
const SCEV *
ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
- for (auto &ENT : ExitNotTaken)
- if (!ENT.hasAlwaysTruePred())
- return SE->getCouldNotCompute();
+ auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+ return !ENT.hasAlwaysTruePredicate();
+ };
- return Max ? Max : SE->getCouldNotCompute();
+ if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
+ return SE->getCouldNotCompute();
+
+ return getMax();
+}
+
+bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
+ auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+ return !ENT.hasAlwaysTruePredicate();
+ };
+ return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
}
bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
ScalarEvolution *SE) const {
- if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+ if (getMax() && getMax() != SE->getCouldNotCompute() &&
+ SE->hasOperand(getMax(), S))
return true;
- if (!ExitNotTaken.ExitingBlock)
- return false;
-
for (auto &ENT : ExitNotTaken)
if (ENT.ExactNotTaken != SE->getCouldNotCompute() &&
SE->hasOperand(ENT.ExactNotTaken, S))
@@ -5602,62 +5703,31 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
- SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, const SCEV *MaxCount)
- : Max(MaxCount) {
-
- if (!Complete)
- ExitNotTaken.setIncomplete();
-
- unsigned NumExits = ExitCounts.size();
- if (NumExits == 0) return;
-
- ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock;
- ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken;
-
- // Determine the number of ExitNotTakenExtras structures that we need.
- unsigned ExtraInfoSize = 0;
- if (NumExits > 1)
- ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()),
- ExitCounts.end(), [](EdgeInfo &Entry) {
- return !Entry.Pred.isAlwaysTrue();
- });
- else if (!ExitCounts[0].Pred.isAlwaysTrue())
- ExtraInfoSize = 1;
-
- ExitNotTakenExtras *ENT = nullptr;
-
- // Allocate the ExitNotTakenExtras structures and initialize the first
- // element (ExitNotTaken).
- if (ExtraInfoSize > 0) {
- ENT = new ExitNotTakenExtras[ExtraInfoSize];
- ExitNotTaken.ExtraInfo = &ENT[0];
- *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred);
- }
-
- if (NumExits == 1)
- return;
-
- assert(ENT && "ExitNotTakenExtras is NULL while having more than one exit");
-
- auto &Exits = ExitNotTaken.ExtraInfo->Exits;
-
- // Handle the rare case of multiple computable exits.
- for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) {
- ExitNotTakenExtras *Ptr = nullptr;
- if (!ExitCounts[i].Pred.isAlwaysTrue()) {
- Ptr = &ENT[PredPos++];
- Ptr->Pred = std::move(ExitCounts[i].Pred);
- }
-
- Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr);
- }
+ SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
+ &&ExitCounts,
+ bool Complete, const SCEV *MaxCount, bool MaxOrZero)
+ : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
+ typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+ ExitNotTaken.reserve(ExitCounts.size());
+ std::transform(
+ ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
+ [&](const EdgeExitInfo &EEI) {
+ BasicBlock *ExitBB = EEI.first;
+ const ExitLimit &EL = EEI.second;
+ if (EL.Predicates.empty())
+ return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr);
+
+ std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
+ for (auto *Pred : EL.Predicates)
+ Predicate->add(Pred);
+
+ return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
+ });
}
/// Invalidate this result and free the ExitNotTakenInfo array.
void ScalarEvolution::BackedgeTakenInfo::clear() {
- ExitNotTaken.ExitingBlock = nullptr;
- ExitNotTaken.ExactNotTaken = nullptr;
- delete[] ExitNotTaken.ExtraInfo;
+ ExitNotTaken.clear();
}
/// Compute the number of times the backedge of the specified loop will execute.
@@ -5667,11 +5737,14 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- SmallVector<EdgeInfo, 4> ExitCounts;
+ typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+
+ SmallVector<EdgeExitInfo, 4> ExitCounts;
bool CouldComputeBECount = true;
BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
const SCEV *MustExitMaxBECount = nullptr;
const SCEV *MayExitMaxBECount = nullptr;
+ bool MustExitMaxOrZero = false;
// Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
// and compute maxBECount.
@@ -5680,17 +5753,17 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
BasicBlock *ExitBB = ExitingBlocks[i];
ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
- assert((AllowPredicates || EL.Pred.isAlwaysTrue()) &&
+ assert((AllowPredicates || EL.Predicates.empty()) &&
"Predicated exit limit when predicates are not allowed!");
// 1. For each exit that can be computed, add an entry to ExitCounts.
// CouldComputeBECount is true only if all exits can be computed.
- if (EL.Exact == getCouldNotCompute())
+ if (EL.ExactNotTaken == getCouldNotCompute())
// We couldn't compute an exact value for this exit, so
// we won't be able to compute an exact value for the loop.
CouldComputeBECount = false;
else
- ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred));
+ ExitCounts.emplace_back(ExitBB, EL);
// 2. Derive the loop's MaxBECount from each exit's max number of
// non-exiting iterations. Partition the loop exits into two kinds:
@@ -5698,29 +5771,35 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
//
// If the exit dominates the loop latch, it is a LoopMustExit otherwise it
// is a LoopMayExit. If any computable LoopMustExit is found, then
- // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
- // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
- // considered greater than any computable EL.Max.
- if (EL.Max != getCouldNotCompute() && Latch &&
+ // MaxBECount is the minimum EL.MaxNotTaken of computable
+ // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
+ // EL.MaxNotTaken, where CouldNotCompute is considered greater than any
+ // computable EL.MaxNotTaken.
+ if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
DT.dominates(ExitBB, Latch)) {
- if (!MustExitMaxBECount)
- MustExitMaxBECount = EL.Max;
- else {
+ if (!MustExitMaxBECount) {
+ MustExitMaxBECount = EL.MaxNotTaken;
+ MustExitMaxOrZero = EL.MaxOrZero;
+ } else {
MustExitMaxBECount =
- getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+ getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
}
} else if (MayExitMaxBECount != getCouldNotCompute()) {
- if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
- MayExitMaxBECount = EL.Max;
+ if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute())
+ MayExitMaxBECount = EL.MaxNotTaken;
else {
MayExitMaxBECount =
- getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
+ getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
}
}
}
const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
(MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
- return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
+ // The loop backedge will be taken the maximum or zero times if there's
+ // a single exit that must be taken the maximum or zero times.
+ bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
+ return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
+ MaxBECount, MaxOrZero);
}
ScalarEvolution::ExitLimit
@@ -5825,39 +5904,40 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
if (EitherMayExit) {
// Both conditions must be true for the loop to continue executing.
// Choose the less conservative count.
- if (EL0.Exact == getCouldNotCompute() ||
- EL1.Exact == getCouldNotCompute())
+ if (EL0.ExactNotTaken == getCouldNotCompute() ||
+ EL1.ExactNotTaken == getCouldNotCompute())
BECount = getCouldNotCompute();
else
- BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
- if (EL0.Max == getCouldNotCompute())
- MaxBECount = EL1.Max;
- else if (EL1.Max == getCouldNotCompute())
- MaxBECount = EL0.Max;
+ BECount =
+ getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
+ if (EL0.MaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL1.MaxNotTaken;
+ else if (EL1.MaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL0.MaxNotTaken;
else
- MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+ MaxBECount =
+ getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
} else {
// Both conditions must be true at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(FBB) && "Loop block has no successor in loop!");
- if (EL0.Max == EL1.Max)
- MaxBECount = EL0.Max;
- if (EL0.Exact == EL1.Exact)
- BECount = EL0.Exact;
+ if (EL0.MaxNotTaken == EL1.MaxNotTaken)
+ MaxBECount = EL0.MaxNotTaken;
+ if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+ BECount = EL0.ExactNotTaken;
}
- SCEVUnionPredicate NP;
- NP.add(&EL0.Pred);
- NP.add(&EL1.Pred);
// There are cases (e.g. PR26207) where computeExitLimitFromCond is able
// to be more aggressive when computing BECount than when computing
- // MaxBECount. In these cases it is possible for EL0.Exact and EL1.Exact
- // to match, but for EL0.Max and EL1.Max to not.
+ // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and
+ // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
+ // to not.
if (isa<SCEVCouldNotCompute>(MaxBECount) &&
!isa<SCEVCouldNotCompute>(BECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, NP);
+ return ExitLimit(BECount, MaxBECount, false,
+ {&EL0.Predicates, &EL1.Predicates});
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
@@ -5873,31 +5953,31 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
if (EitherMayExit) {
// Both conditions must be false for the loop to continue executing.
// Choose the less conservative count.
- if (EL0.Exact == getCouldNotCompute() ||
- EL1.Exact == getCouldNotCompute())
+ if (EL0.ExactNotTaken == getCouldNotCompute() ||
+ EL1.ExactNotTaken == getCouldNotCompute())
BECount = getCouldNotCompute();
else
- BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
- if (EL0.Max == getCouldNotCompute())
- MaxBECount = EL1.Max;
- else if (EL1.Max == getCouldNotCompute())
- MaxBECount = EL0.Max;
+ BECount =
+ getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
+ if (EL0.MaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL1.MaxNotTaken;
+ else if (EL1.MaxNotTaken == getCouldNotCompute())
+ MaxBECount = EL0.MaxNotTaken;
else
- MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+ MaxBECount =
+ getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
} else {
// Both conditions must be false at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(TBB) && "Loop block has no successor in loop!");
- if (EL0.Max == EL1.Max)
- MaxBECount = EL0.Max;
- if (EL0.Exact == EL1.Exact)
- BECount = EL0.Exact;
+ if (EL0.MaxNotTaken == EL1.MaxNotTaken)
+ MaxBECount = EL0.MaxNotTaken;
+ if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+ BECount = EL0.ExactNotTaken;
}
- SCEVUnionPredicate NP;
- NP.add(&EL0.Pred);
- NP.add(&EL1.Pred);
- return ExitLimit(BECount, MaxBECount, NP);
+ return ExitLimit(BECount, MaxBECount, false,
+ {&EL0.Predicates, &EL1.Predicates});
}
}
@@ -5979,8 +6059,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
if (AddRec->getLoop() == L) {
// Form the constant range.
- ConstantRange CompRange(
- ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
+ ConstantRange CompRange =
+ ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt());
const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
@@ -6184,7 +6264,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
// %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
// %iv.shifted = lshr i32 %iv, <positive constant>
//
- // Return true on a succesful match. Return the corresponding PHI node (%iv
+ // Return true on a successful match. Return the corresponding PHI node (%iv
// above) in PNOut and the opcode of the shift operation in OpCodeOut.
auto MatchShiftRecurrence =
[&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
@@ -6282,8 +6362,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
const SCEV *UpperBound =
getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
- SCEVUnionPredicate P;
- return ExitLimit(getCouldNotCompute(), UpperBound, P);
+ return ExitLimit(getCouldNotCompute(), UpperBound, false);
}
return getCouldNotCompute();
@@ -7044,7 +7123,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// effectively V != 0. We know and take advantage of the fact that this
// expression only being used in a comparison by zero context.
- SCEVUnionPredicate P;
+ SmallPtrSet<const SCEVPredicate *, 4> Predicates;
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
@@ -7057,7 +7136,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
// algorithm below.
- AddRec = convertSCEVToAddRecWithPredicates(V, L, P);
+ AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);
if (!AddRec || AddRec->getLoop() != L)
return getCouldNotCompute();
@@ -7079,7 +7158,8 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// should not accept a root of 2.
const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
if (Val->isZero())
- return ExitLimit(R1, R1, P); // We found a quadratic root!
+ // We found a quadratic root!
+ return ExitLimit(R1, R1, false, Predicates);
}
}
return getCouldNotCompute();
@@ -7136,7 +7216,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
else
MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
: -CR.getUnsignedMin());
- return ExitLimit(Distance, MaxBECount, P);
+ return ExitLimit(Distance, MaxBECount, false, Predicates);
}
// As a special case, handle the instance where Step is a positive power of
@@ -7191,7 +7271,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
const SCEV *Limit =
getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
- return ExitLimit(Limit, Limit, P);
+ return ExitLimit(Limit, Limit, false, Predicates);
}
}
@@ -7204,14 +7284,14 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
loopHasNoAbnormalExits(AddRec->getLoop())) {
const SCEV *Exact =
getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
- return ExitLimit(Exact, Exact, P);
+ return ExitLimit(Exact, Exact, false, Predicates);
}
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
const SCEV *E = SolveLinEquationWithOverflow(
StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
- return ExitLimit(E, E, P);
+ return ExitLimit(E, E, false, Predicates);
}
return getCouldNotCompute();
}
@@ -7323,149 +7403,77 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// cases, and canonicalize *-or-equal comparisons to regular comparisons.
if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
const APInt &RA = RC->getAPInt();
- switch (Pred) {
- default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
- case ICmpInst::ICMP_EQ:
- case ICmpInst::ICMP_NE:
- // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
- if (!RA)
- if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
- if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
- if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
- ME->getOperand(0)->isAllOnesValue()) {
- RHS = AE->getOperand(1);
- LHS = ME->getOperand(1);
- Changed = true;
- }
- break;
- case ICmpInst::ICMP_UGE:
- if ((RA - 1).isMinValue()) {
- Pred = ICmpInst::ICMP_NE;
- RHS = getConstant(RA - 1);
- Changed = true;
- break;
- }
- if (RA.isMaxValue()) {
- Pred = ICmpInst::ICMP_EQ;
- Changed = true;
- break;
- }
- if (RA.isMinValue()) goto trivially_true;
- Pred = ICmpInst::ICMP_UGT;
- RHS = getConstant(RA - 1);
- Changed = true;
- break;
- case ICmpInst::ICMP_ULE:
- if ((RA + 1).isMaxValue()) {
- Pred = ICmpInst::ICMP_NE;
- RHS = getConstant(RA + 1);
- Changed = true;
- break;
- }
- if (RA.isMinValue()) {
- Pred = ICmpInst::ICMP_EQ;
- Changed = true;
- break;
- }
- if (RA.isMaxValue()) goto trivially_true;
+ bool SimplifiedByConstantRange = false;
- Pred = ICmpInst::ICMP_ULT;
- RHS = getConstant(RA + 1);
- Changed = true;
- break;
- case ICmpInst::ICMP_SGE:
- if ((RA - 1).isMinSignedValue()) {
- Pred = ICmpInst::ICMP_NE;
- RHS = getConstant(RA - 1);
- Changed = true;
- break;
- }
- if (RA.isMaxSignedValue()) {
- Pred = ICmpInst::ICMP_EQ;
- Changed = true;
- break;
+ if (!ICmpInst::isEquality(Pred)) {
+ ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
+ if (ExactCR.isFullSet())
+ goto trivially_true;
+ else if (ExactCR.isEmptySet())
+ goto trivially_false;
+
+ APInt NewRHS;
+ CmpInst::Predicate NewPred;
+ if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
+ ICmpInst::isEquality(NewPred)) {
+ // We were able to convert an inequality to an equality.
+ Pred = NewPred;
+ RHS = getConstant(NewRHS);
+ Changed = SimplifiedByConstantRange = true;
}
- if (RA.isMinSignedValue()) goto trivially_true;
+ }
- Pred = ICmpInst::ICMP_SGT;
- RHS = getConstant(RA - 1);
- Changed = true;
- break;
- case ICmpInst::ICMP_SLE:
- if ((RA + 1).isMaxSignedValue()) {
- Pred = ICmpInst::ICMP_NE;
- RHS = getConstant(RA + 1);
- Changed = true;
+ if (!SimplifiedByConstantRange) {
+ switch (Pred) {
+ default:
break;
- }
- if (RA.isMinSignedValue()) {
- Pred = ICmpInst::ICMP_EQ;
- Changed = true;
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
+ if (!RA)
+ if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
+ if (const SCEVMulExpr *ME =
+ dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
+ if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
+ ME->getOperand(0)->isAllOnesValue()) {
+ RHS = AE->getOperand(1);
+ LHS = ME->getOperand(1);
+ Changed = true;
+ }
break;
- }
- if (RA.isMaxSignedValue()) goto trivially_true;
- Pred = ICmpInst::ICMP_SLT;
- RHS = getConstant(RA + 1);
- Changed = true;
- break;
- case ICmpInst::ICMP_UGT:
- if (RA.isMinValue()) {
- Pred = ICmpInst::ICMP_NE;
+
+ // The "Should have been caught earlier!" messages refer to the fact
+ // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
+ // should have fired on the corresponding cases, and canonicalized the
+ // check to trivially_true or trivially_false.
+
+ case ICmpInst::ICMP_UGE:
+ assert(!RA.isMinValue() && "Should have been caught earlier!");
+ Pred = ICmpInst::ICMP_UGT;
+ RHS = getConstant(RA - 1);
Changed = true;
break;
- }
- if ((RA + 1).isMaxValue()) {
- Pred = ICmpInst::ICMP_EQ;
+ case ICmpInst::ICMP_ULE:
+ assert(!RA.isMaxValue() && "Should have been caught earlier!");
+ Pred = ICmpInst::ICMP_ULT;
RHS = getConstant(RA + 1);
Changed = true;
break;
- }
- if (RA.isMaxValue()) goto trivially_false;
- break;
- case ICmpInst::ICMP_ULT:
- if (RA.isMaxValue()) {
- Pred = ICmpInst::ICMP_NE;
- Changed = true;
- break;
- }
- if ((RA - 1).isMinValue()) {
- Pred = ICmpInst::ICMP_EQ;
+ case ICmpInst::ICMP_SGE:
+ assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
+ Pred = ICmpInst::ICMP_SGT;
RHS = getConstant(RA - 1);
Changed = true;
break;
- }
- if (RA.isMinValue()) goto trivially_false;
- break;
- case ICmpInst::ICMP_SGT:
- if (RA.isMinSignedValue()) {
- Pred = ICmpInst::ICMP_NE;
- Changed = true;
- break;
- }
- if ((RA + 1).isMaxSignedValue()) {
- Pred = ICmpInst::ICMP_EQ;
+ case ICmpInst::ICMP_SLE:
+ assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
+ Pred = ICmpInst::ICMP_SLT;
RHS = getConstant(RA + 1);
Changed = true;
break;
}
- if (RA.isMaxSignedValue()) goto trivially_false;
- break;
- case ICmpInst::ICMP_SLT:
- if (RA.isMaxSignedValue()) {
- Pred = ICmpInst::ICMP_NE;
- Changed = true;
- break;
- }
- if ((RA - 1).isMinSignedValue()) {
- Pred = ICmpInst::ICMP_EQ;
- RHS = getConstant(RA - 1);
- Changed = true;
- break;
- }
- if (RA.isMinSignedValue()) goto trivially_false;
- break;
}
}
@@ -8025,34 +8033,16 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
return false;
}
-namespace {
-/// RAII wrapper to prevent recursive application of isImpliedCond.
-/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
-/// currently evaluating isImpliedCond.
-struct MarkPendingLoopPredicate {
- Value *Cond;
- DenseSet<Value*> &LoopPreds;
- bool Pending;
-
- MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
- : Cond(C), LoopPreds(LP) {
- Pending = !LoopPreds.insert(Cond).second;
- }
- ~MarkPendingLoopPredicate() {
- if (!Pending)
- LoopPreds.erase(Cond);
- }
-};
-} // end anonymous namespace
-
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
Value *FoundCondValue,
bool Inverse) {
- MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
- if (Mark.Pending)
+ if (!PendingLoopPredicates.insert(FoundCondValue).second)
return false;
+ auto ClearOnExit =
+ make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
+
// Recursively handle And and Or conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
if (BO->getOpcode() == Instruction::And) {
@@ -8237,9 +8227,8 @@ bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
return true;
}
-bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
- const SCEV *More,
- APInt &C) {
+Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
+ const SCEV *Less) {
// We avoid subtracting expressions here because this function is usually
// fairly deep in the call stack (i.e. is called many times).
@@ -8248,15 +8237,15 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
const auto *MAR = cast<SCEVAddRecExpr>(More);
if (LAR->getLoop() != MAR->getLoop())
- return false;
+ return None;
// We look at affine expressions only; not for correctness but to keep
// getStepRecurrence cheap.
if (!LAR->isAffine() || !MAR->isAffine())
- return false;
+ return None;
if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
- return false;
+ return None;
Less = LAR->getStart();
More = MAR->getStart();
@@ -8267,27 +8256,22 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
const auto &M = cast<SCEVConstant>(More)->getAPInt();
const auto &L = cast<SCEVConstant>(Less)->getAPInt();
- C = M - L;
- return true;
+ return M - L;
}
const SCEV *L, *R;
SCEV::NoWrapFlags Flags;
if (splitBinaryAdd(Less, L, R, Flags))
if (const auto *LC = dyn_cast<SCEVConstant>(L))
- if (R == More) {
- C = -(LC->getAPInt());
- return true;
- }
+ if (R == More)
+ return -(LC->getAPInt());
if (splitBinaryAdd(More, L, R, Flags))
if (const auto *LC = dyn_cast<SCEVConstant>(L))
- if (R == Less) {
- C = LC->getAPInt();
- return true;
- }
+ if (R == Less)
+ return LC->getAPInt();
- return false;
+ return None;
}
bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
@@ -8344,22 +8328,21 @@ bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
// neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
// C)".
- APInt LDiff, RDiff;
- if (!computeConstantDifference(FoundLHS, LHS, LDiff) ||
- !computeConstantDifference(FoundRHS, RHS, RDiff) ||
- LDiff != RDiff)
+ Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
+ Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
+ if (!LDiff || !RDiff || *LDiff != *RDiff)
return false;
- if (LDiff == 0)
+ if (LDiff->isMinValue())
return true;
APInt FoundRHSLimit;
if (Pred == CmpInst::ICMP_ULT) {
- FoundRHSLimit = -RDiff;
+ FoundRHSLimit = -(*RDiff);
} else {
assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
- FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff;
+ FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
}
// Try to prove (1) or (2), as needed.
@@ -8469,7 +8452,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
case ICmpInst::ICMP_SGE:
std::swap(LHS, RHS);
- // fall through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLE:
return
// min(A, ...) <= A
@@ -8479,7 +8462,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
case ICmpInst::ICMP_UGE:
std::swap(LHS, RHS);
- // fall through
+ LLVM_FALLTHROUGH;
case ICmpInst::ICMP_ULE:
return
// min(A, ...) <= A
@@ -8550,9 +8533,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
// reduce the compile time impact of this optimization.
return false;
- const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
- if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
- !isa<SCEVConstant>(AddLHS->getOperand(0)))
+ Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
+ if (!Addend)
return false;
APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
@@ -8562,10 +8544,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
ConstantRange FoundLHSRange =
ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
- // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
- // for `LHS`:
- APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();
- ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
+ // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
+ ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
// We can also compute the range of values for `LHS` that satisfy the
// consequent, "`LHS` `Pred` `RHS`":
@@ -8580,6 +8560,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned, bool NoWrap) {
+ assert(isKnownPositive(Stride) && "Positive stride expected!");
+
if (NoWrap) return false;
unsigned BitWidth = getTypeSizeInBits(RHS->getType());
@@ -8642,17 +8624,21 @@ ScalarEvolution::ExitLimit
ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
bool ControlsExit, bool AllowPredicates) {
- SCEVUnionPredicate P;
+ SmallPtrSet<const SCEVPredicate *, 4> Predicates;
// We handle only IV < Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
- if (!IV && AllowPredicates)
+ bool PredicatedIV = false;
+
+ if (!IV && AllowPredicates) {
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
// algorithm below.
- IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+ IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
+ PredicatedIV = true;
+ }
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8663,61 +8649,144 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const SCEV *Stride = IV->getStepRecurrence(*this);
- // Avoid negative or zero stride values
- if (!isKnownPositive(Stride))
- return getCouldNotCompute();
+ bool PositiveStride = isKnownPositive(Stride);
- // Avoid proven overflow cases: this will ensure that the backedge taken count
- // will not generate any unsigned overflow. Relaxed no-overflow conditions
- // exploit NoWrapFlags, allowing to optimize in presence of undefined
- // behaviors like the case of C language.
- if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ // Avoid negative or zero stride values.
+ if (!PositiveStride) {
+ // We can compute the correct backedge taken count for loops with unknown
+ // strides if we can prove that the loop is not an infinite loop with side
+ // effects. Here's the loop structure we are trying to handle -
+ //
+ // i = start
+ // do {
+ // A[i] = i;
+ // i += s;
+ // } while (i < end);
+ //
+ // The backedge taken count for such loops is evaluated as -
+ // (max(end, start + stride) - start - 1) /u stride
+ //
+ // The additional preconditions that we need to check to prove correctness
+ // of the above formula is as follows -
+ //
+ // a) IV is either nuw or nsw depending upon signedness (indicated by the
+ // NoWrap flag).
+ // b) loop is single exit with no side effects.
+ //
+ //
+ // Precondition a) implies that if the stride is negative, this is a single
+ // trip loop. The backedge taken count formula reduces to zero in this case.
+ //
+ // Precondition b) implies that the unknown stride cannot be zero otherwise
+ // we have UB.
+ //
+ // The positive stride case is the same as isKnownPositive(Stride) returning
+ // true (original behavior of the function).
+ //
+ // We want to make sure that the stride is truly unknown as there are edge
+ // cases where ScalarEvolution propagates no wrap flags to the
+ // post-increment/decrement IV even though the increment/decrement operation
+ // itself is wrapping. The computed backedge taken count may be wrong in
+ // such cases. This is prevented by checking that the stride is not known to
+ // be either positive or non-positive. For example, no wrap flags are
+ // propagated to the post-increment IV of this loop with a trip count of 2 -
+ //
+ // unsigned char i;
+ // for(i=127; i<128; i+=129)
+ // A[i] = i;
+ //
+ if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
+ !loopHasNoSideEffects(L))
+ return getCouldNotCompute();
+
+ } else if (!Stride->isOne() &&
+ doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ // Avoid proven overflow cases: this will ensure that the backedge taken
+ // count will not generate any unsigned overflow. Relaxed no-overflow
+ // conditions exploit NoWrapFlags, allowing to optimize in presence of
+ // undefined behaviors like the case of C language.
return getCouldNotCompute();
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
: ICmpInst::ICMP_ULT;
const SCEV *Start = IV->getStart();
const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+ // If the backedge is taken at least once, then it will be taken
+ // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
+ // is the LHS value of the less-than comparison the first time it is evaluated
+ // and End is the RHS.
+ const SCEV *BECountIfBackedgeTaken =
+ computeBECount(getMinusSCEV(End, Start), Stride, false);
+ // If the loop entry is guarded by the result of the backedge test of the
+ // first loop iteration, then we know the backedge will be taken at least
+ // once and so the backedge taken count is as above. If not then we use the
+ // expression (max(End,Start)-Start)/Stride to describe the backedge count,
+ // as if the backedge is taken at least once max(End,Start) is End and so the
+ // result is as above, and if not max(End,Start) is Start so we get a backedge
+ // count of zero.
+ const SCEV *BECount;
+ if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+ BECount = BECountIfBackedgeTaken;
+ else {
End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
+ BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+ }
- const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+ const SCEV *MaxBECount;
+ bool MaxOrZero = false;
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) {
+ // If we know exactly how many times the backedge will be taken if it's
+ // taken at least once, then the backedge count will either be that or
+ // zero.
+ MaxBECount = BECountIfBackedgeTaken;
+ MaxOrZero = true;
+ } else {
+ // Calculate the maximum backedge count based on the range of values
+ // permitted by Start, End, and Stride.
+ APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+ : getUnsignedRange(Start).getUnsignedMin();
- APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
- : getUnsignedRange(Start).getUnsignedMin();
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
- APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
- : getUnsignedRange(Stride).getUnsignedMin();
+ APInt StrideForMaxBECount;
- unsigned BitWidth = getTypeSizeInBits(LHS->getType());
- APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
- : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+ if (PositiveStride)
+ StrideForMaxBECount =
+ IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+ else
+ // Using a stride of 1 is safe when computing max backedge taken count for
+ // a loop with unknown stride.
+ StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
- // Although End can be a MAX expression we estimate MaxEnd considering only
- // the case End = RHS. This is safe because in the other case (End - Start)
- // is zero, leading to a zero maximum backedge taken count.
- APInt MaxEnd =
- IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
- : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+ APInt Limit =
+ IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
+ : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
+
+ // Although End can be a MAX expression we estimate MaxEnd considering only
+ // the case End = RHS. This is safe because in the other case (End - Start)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MaxEnd =
+ IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+ : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
- const SCEV *MaxBECount;
- if (isa<SCEVConstant>(BECount))
- MaxBECount = BECount;
- else
MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
- getConstant(MinStride), false);
+ getConstant(StrideForMaxBECount), false);
+ }
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, P);
+ return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
}
ScalarEvolution::ExitLimit
ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
bool ControlsExit, bool AllowPredicates) {
- SCEVUnionPredicate P;
+ SmallPtrSet<const SCEVPredicate *, 4> Predicates;
// We handle only IV > Invariant
if (!isLoopInvariant(RHS, L))
return getCouldNotCompute();
@@ -8727,7 +8796,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
// algorithm below.
- IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+ IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
// Avoid weird loops
if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8787,7 +8856,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVCouldNotCompute>(MaxBECount))
MaxBECount = BECount;
- return ExitLimit(BECount, MaxBECount, P);
+ return ExitLimit(BECount, MaxBECount, false, Predicates);
}
const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
@@ -8859,9 +8928,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
// Range.getUpper() is crossed.
SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
- const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
- // getNoWrapFlags(FlagNW)
- FlagAnyWrap);
+ const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
// Next, solve the constructed addrec
if (auto Roots =
@@ -8905,38 +8972,15 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
return SE.getCouldNotCompute();
}
-namespace {
-struct FindUndefs {
- bool Found;
- FindUndefs() : Found(false) {}
-
- bool follow(const SCEV *S) {
- if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
- if (isa<UndefValue>(C->getValue()))
- Found = true;
- } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
- if (isa<UndefValue>(C->getValue()))
- Found = true;
- }
-
- // Keep looking if we haven't found it yet.
- return !Found;
- }
- bool isDone() const {
- // Stop recursion if we have found an undef.
- return Found;
- }
-};
-}
-
// Return true when S contains at least an undef value.
-static inline bool
-containsUndefs(const SCEV *S) {
- FindUndefs F;
- SCEVTraversal<FindUndefs> ST(F);
- ST.visitAll(S);
-
- return F.Found;
+static inline bool containsUndefs(const SCEV *S) {
+ return SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *SU = dyn_cast<SCEVUnknown>(S))
+ return isa<UndefValue>(SU->getValue());
+ else if (const auto *SC = dyn_cast<SCEVConstant>(S))
+ return isa<UndefValue>(SC->getValue());
+ return false;
+ });
}
namespace {
@@ -8964,7 +9008,8 @@ struct SCEVCollectTerms {
: Terms(T) {}
bool follow(const SCEV *S) {
- if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
+ if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
+ isa<SCEVSignExtendExpr>(S)) {
if (!containsUndefs(S))
Terms.push_back(S);
@@ -9116,10 +9161,9 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
}
// Remove all SCEVConstants.
- Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
- return isa<SCEVConstant>(E);
- }),
- Terms.end());
+ Terms.erase(
+ remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }),
+ Terms.end());
if (Terms.size() > 0)
if (!findArrayDimensionsRec(SE, Terms, Sizes))
@@ -9129,40 +9173,11 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
return true;
}
-// Returns true when S contains at least a SCEVUnknown parameter.
-static inline bool
-containsParameters(const SCEV *S) {
- struct FindParameter {
- bool FoundParameter;
- FindParameter() : FoundParameter(false) {}
-
- bool follow(const SCEV *S) {
- if (isa<SCEVUnknown>(S)) {
- FoundParameter = true;
- // Stop recursion: we found a parameter.
- return false;
- }
- // Keep looking.
- return true;
- }
- bool isDone() const {
- // Stop recursion if we have found a parameter.
- return FoundParameter;
- }
- };
-
- FindParameter F;
- SCEVTraversal<FindParameter> ST(F);
- ST.visitAll(S);
-
- return F.FoundParameter;
-}
// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
-static inline bool
-containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
for (const SCEV *T : Terms)
- if (containsParameters(T))
+ if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>))
return true;
return false;
}
@@ -9493,6 +9508,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
: F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
ValueExprMap(std::move(Arg.ValueExprMap)),
+ PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
@@ -9501,6 +9517,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
std::move(Arg.ConstantEvolutionLoopExitValue)),
ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
LoopDispositions(std::move(Arg.LoopDispositions)),
+ LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
BlockDispositions(std::move(Arg.BlockDispositions)),
UnsignedRanges(std::move(Arg.UnsignedRanges)),
SignedRanges(std::move(Arg.SignedRanges)),
@@ -9569,6 +9586,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+ if (SE->isBackedgeTakenCountMaxOrZero(L))
+ OS << ", actual taken count either this or zero.";
} else {
OS << "Unpredictable max backedge-taken count. ";
}
@@ -9829,8 +9848,10 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
if (!DT.dominates(AR->getLoop()->getHeader(), BB))
return DoesNotDominateBlock;
+
+ // Fall through into SCEVNAryExpr handling.
+ LLVM_FALLTHROUGH;
}
- // FALL THROUGH into SCEVNAryExpr handling.
case scAddExpr:
case scMulExpr:
case scUMaxExpr:
@@ -9883,24 +9904,7 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
}
bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
- // Search for a SCEV expression node within an expression tree.
- // Implements SCEVTraversal::Visitor.
- struct SCEVSearch {
- const SCEV *Node;
- bool IsFound;
-
- SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
-
- bool follow(const SCEV *S) {
- IsFound |= (S == Node);
- return !IsFound;
- }
- bool isDone() const { return IsFound; }
- };
-
- SCEVSearch Search(Op);
- visitAll(S, Search);
- return Search.IsFound;
+ return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
}
void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
@@ -10008,10 +10012,10 @@ void ScalarEvolution::verify() const {
// TODO: Verify more things.
}
-char ScalarEvolutionAnalysis::PassID;
+AnalysisKey ScalarEvolutionAnalysis::Key;
ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
- AnalysisManager<Function> &AM) {
+ FunctionAnalysisManager &AM) {
return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -10019,7 +10023,7 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
}
PreservedAnalyses
-ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
return PreservedAnalyses::all();
}
@@ -10106,25 +10110,34 @@ namespace {
class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
public:
- // Rewrites \p S in the context of a loop L and the predicate A.
- // If Assume is true, rewrite is free to add further predicates to A
- // such that the result will be an AddRecExpr.
+ /// Rewrites \p S in the context of a loop L and the SCEV predication
+ /// infrastructure.
+ ///
+ /// If \p Pred is non-null, the SCEV expression is rewritten to respect the
+ /// equivalences present in \p Pred.
+ ///
+ /// If \p NewPreds is non-null, rewrite is free to add further predicates to
+ /// \p NewPreds such that the result will be an AddRecExpr.
static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
- SCEVUnionPredicate &A, bool Assume) {
- SCEVPredicateRewriter Rewriter(L, SE, A, Assume);
+ SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
+ SCEVUnionPredicate *Pred) {
+ SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
return Rewriter.visit(S);
}
SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
- SCEVUnionPredicate &P, bool Assume)
- : SCEVRewriteVisitor(SE), P(P), L(L), Assume(Assume) {}
+ SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
+ SCEVUnionPredicate *Pred)
+ : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- auto ExprPreds = P.getPredicatesForExpr(Expr);
- for (auto *Pred : ExprPreds)
- if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
- if (IPred->getLHS() == Expr)
- return IPred->getRHS();
+ if (Pred) {
+ auto ExprPreds = Pred->getPredicatesForExpr(Expr);
+ for (auto *Pred : ExprPreds)
+ if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
+ if (IPred->getLHS() == Expr)
+ return IPred->getRHS();
+ }
return Expr;
}
@@ -10165,32 +10178,31 @@ private:
bool addOverflowAssumption(const SCEVAddRecExpr *AR,
SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
auto *A = SE.getWrapPredicate(AR, AddedFlags);
- if (!Assume) {
+ if (!NewPreds) {
// Check if we've already made this assumption.
- if (P.implies(A))
- return true;
- return false;
+ return Pred && Pred->implies(A);
}
- P.add(A);
+ NewPreds->insert(A);
return true;
}
- SCEVUnionPredicate &P;
+ SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
+ SCEVUnionPredicate *Pred;
const Loop *L;
- bool Assume;
};
} // end anonymous namespace
const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
SCEVUnionPredicate &Preds) {
- return SCEVPredicateRewriter::rewrite(S, L, *this, Preds, false);
+ return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
}
-const SCEVAddRecExpr *
-ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L,
- SCEVUnionPredicate &Preds) {
- SCEVUnionPredicate TransformPreds;
- S = SCEVPredicateRewriter::rewrite(S, L, *this, TransformPreds, true);
+const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
+ const SCEV *S, const Loop *L,
+ SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
+
+ SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
+ S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
if (!AddRec)
@@ -10198,7 +10210,9 @@ ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L,
// Since the transformation was successful, we can now transfer the SCEV
// predicates.
- Preds.add(&TransformPreds);
+ for (auto *P : TransformPreds)
+ Preds.insert(P);
+
return AddRec;
}
@@ -10351,7 +10365,7 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
return Entry.second;
// We found an entry but it's stale. Rewrite the stale entry
- // acording to the current predicate.
+ // according to the current predicate.
if (Entry.second)
Expr = Entry.second;
@@ -10425,11 +10439,15 @@ bool PredicatedScalarEvolution::hasNoOverflow(
const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
const SCEV *Expr = this->getSCEV(V);
- auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, Preds);
+ SmallPtrSet<const SCEVPredicate *, 4> NewPreds;
+ auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);
if (!New)
return nullptr;
+ for (auto *P : NewPreds)
+ Preds.add(P);
+
updateGeneration();
RewriteMap[SE.getSCEV(V)] = {Generation, New};
return New;
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 61fb411d3150..7bea994121c8 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -110,9 +110,9 @@ Value *SCEVAAResult::GetBaseValue(const SCEV *S) {
return nullptr;
}
-char SCEVAA::PassID;
+AnalysisKey SCEVAA::Key;
-SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> &AM) {
+SCEVAAResult SCEVAA::run(Function &F, FunctionAnalysisManager &AM) {
return SCEVAAResult(AM.getResult<ScalarEvolutionAnalysis>(F));
}
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 2e45bb840946..d15a7dbd20e6 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -549,9 +549,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
if (!L->isLoopInvariant(V)) break;
- bool AnyIndexNotLoopInvariant =
- std::any_of(GepIndices.begin(), GepIndices.end(),
- [L](Value *Op) { return !L->isLoopInvariant(Op); });
+ bool AnyIndexNotLoopInvariant = any_of(
+ GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
if (AnyIndexNotLoopInvariant)
break;
@@ -1183,11 +1182,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
PostIncLoopSet SavedPostIncLoops = PostIncLoops;
PostIncLoops.clear();
- // Expand code for the start value.
- Value *StartV =
- expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());
+ // Expand code for the start value into the loop preheader.
+ assert(L->getLoopPreheader() &&
+ "Can't expand add recurrences without a loop preheader!");
+ Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+ L->getLoopPreheader()->getTerminator());
- // StartV must be hoisted into L's preheader to dominate the new phi.
+ // StartV must have been be inserted into L's preheader to dominate the new
+ // phi.
assert(!isa<Instruction>(StartV) ||
SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
L->getHeader()));
@@ -1625,9 +1627,10 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
return V;
}
-Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
- const Instruction *InsertPt) {
- SetVector<Value *> *Set = SE.getSCEVValues(S);
+ScalarEvolution::ValueOffsetPair
+SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+ const Instruction *InsertPt) {
+ SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
// If the expansion is not in CanonicalMode, and the SCEV contains any
// sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
if (CanonicalMode || !SE.containsAddRecurrence(S)) {
@@ -1636,21 +1639,21 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
// Choose a Value from the set which dominates the insertPt.
// insertPt should be inside the Value's parent loop so as not to break
// the LCSSA form.
- for (auto const &Ent : *Set) {
+ for (auto const &VOPair : *Set) {
+ Value *V = VOPair.first;
+ ConstantInt *Offset = VOPair.second;
Instruction *EntInst = nullptr;
- if (Ent && isa<Instruction>(Ent) &&
- (EntInst = cast<Instruction>(Ent)) &&
- S->getType() == Ent->getType() &&
+ if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
+ S->getType() == V->getType() &&
EntInst->getFunction() == InsertPt->getFunction() &&
SE.DT.dominates(EntInst, InsertPt) &&
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) {
- return Ent;
- }
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ return {V, Offset};
}
}
}
- return nullptr;
+ return {nullptr, nullptr};
}
// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
@@ -1698,11 +1701,33 @@ Value *SCEVExpander::expand(const SCEV *S) {
Builder.SetInsertPoint(InsertPt);
// Expand the expression into instructions.
- Value *V = FindValueInExprValueMap(S, InsertPt);
+ ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
+ Value *V = VO.first;
if (!V)
V = visit(S);
-
+ else if (VO.second) {
+ if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
+ Type *Ety = Vty->getPointerElementType();
+ int64_t Offset = VO.second->getSExtValue();
+ int64_t ESize = SE.getTypeSizeInBits(Ety);
+ if ((Offset * 8) % ESize == 0) {
+ ConstantInt *Idx =
+ ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
+ V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
+ } else {
+ ConstantInt *Idx =
+ ConstantInt::getSigned(VO.second->getType(), -Offset);
+ unsigned AS = Vty->getAddressSpace();
+ V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+ V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+ "uglygep");
+ V = Builder.CreateBitCast(V, Vty);
+ }
+ } else {
+ V = Builder.CreateSub(V, VO.second);
+ }
+ }
// Remember the expanded value for this SCEV at this location.
//
// This is independent of PostIncLoops. The mapped value simply materializes
@@ -1887,8 +1912,18 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
-Value *SCEVExpander::findExistingExpansion(const SCEV *S,
- const Instruction *At, Loop *L) {
+Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
+ const Instruction *At, Loop *L) {
+ Optional<ScalarEvolution::ValueOffsetPair> VO =
+ getRelatedExistingExpansion(S, At, L);
+ if (VO && VO.getValue().second == nullptr)
+ return VO.getValue().first;
+ return nullptr;
+}
+
+Optional<ScalarEvolution::ValueOffsetPair>
+SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
+ Loop *L) {
using namespace llvm::PatternMatch;
SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -1906,31 +1941,32 @@ Value *SCEVExpander::findExistingExpansion(const SCEV *S,
continue;
if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
- return LHS;
+ return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
- return RHS;
+ return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
}
// Use expand's logic which is used for reusing a previous Value in
// ExprValueMap.
- if (Value *Val = FindValueInExprValueMap(S, At))
- return Val;
+ ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
+ if (VO.first)
+ return VO;
// There is potential to make this significantly smarter, but this simple
// heuristic already gets some interesting cases.
// Can not find suitable value.
- return nullptr;
+ return None;
}
bool SCEVExpander::isHighCostExpansionHelper(
const SCEV *S, Loop *L, const Instruction *At,
SmallPtrSetImpl<const SCEV *> &Processed) {
- // If we can find an existing value for this scev avaliable at the point "At"
+ // If we can find an existing value for this scev available at the point "At"
// then consider the expression cheap.
- if (At && findExistingExpansion(S, At, L) != nullptr)
+ if (At && getRelatedExistingExpansion(S, At, L))
return false;
// Zero/One operand expressions
@@ -1978,7 +2014,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
// involving division. This is just a simple search heuristic.
if (!At)
At = &ExitingBB->back();
- if (!findExistingExpansion(
+ if (!getRelatedExistingExpansion(
SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
return true;
}
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index 82e65a1f2088..833c6e09f6fd 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -127,9 +127,8 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
return AAResultBase::getModRefInfo(CS1, CS2);
}
-void ScopedNoAliasAAResult::collectMDInDomain(
- const MDNode *List, const MDNode *Domain,
- SmallPtrSetImpl<const MDNode *> &Nodes) const {
+static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
+ SmallPtrSetImpl<const MDNode *> &Nodes) {
for (const MDOperand &MDOp : List->operands())
if (const MDNode *MD = dyn_cast<MDNode>(MDOp))
if (AliasScopeNode(MD).getDomain() == Domain)
@@ -151,12 +150,14 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
// We alias unless, for some domain, the set of noalias scopes in that domain
// is a superset of the set of alias scopes in that domain.
for (const MDNode *Domain : Domains) {
- SmallPtrSet<const MDNode *, 16> NANodes, ScopeNodes;
- collectMDInDomain(NoAlias, Domain, NANodes);
+ SmallPtrSet<const MDNode *, 16> ScopeNodes;
collectMDInDomain(Scopes, Domain, ScopeNodes);
- if (!ScopeNodes.size())
+ if (ScopeNodes.empty())
continue;
+ SmallPtrSet<const MDNode *, 16> NANodes;
+ collectMDInDomain(NoAlias, Domain, NANodes);
+
// To not alias, all of the nodes in ScopeNodes must be in NANodes.
bool FoundAll = true;
for (const MDNode *SMD : ScopeNodes)
@@ -172,10 +173,10 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
return true;
}
-char ScopedNoAliasAA::PassID;
+AnalysisKey ScopedNoAliasAA::Key;
ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
- AnalysisManager<Function> &AM) {
+ FunctionAnalysisManager &AM) {
return ScopedNoAliasAAResult();
}
diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h
index fd3a241d79c1..772df175b384 100644
--- a/lib/Analysis/StratifiedSets.h
+++ b/lib/Analysis/StratifiedSets.h
@@ -85,17 +85,8 @@ struct StratifiedLink {
template <typename T> class StratifiedSets {
public:
StratifiedSets() = default;
-
- // TODO: Figure out how to make MSVC not call the copy ctor here, and delete
- // it.
-
- // Can't default these due to compile errors in MSVC2013
- StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); }
- StratifiedSets &operator=(StratifiedSets &&Other) {
- Values = std::move(Other.Values);
- Links = std::move(Other.Links);
- return *this;
- }
+ StratifiedSets(StratifiedSets &&) = default;
+ StratifiedSets &operator=(StratifiedSets &&) = default;
StratifiedSets(DenseMap<T, StratifiedInfo> Map,
std::vector<StratifiedLink> Links)
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 93d537ad3abb..112118ab77eb 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -23,9 +23,10 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
"No vector functions library"),
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
"Accelerate framework"),
- clEnumValEnd));
+ clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
+ "Intel SVML library")));
-const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
#define TLI_DEFINE_STRING
#include "llvm/Analysis/TargetLibraryInfo.def"
};
@@ -52,14 +53,33 @@ static bool hasSinCosPiStret(const Triple &T) {
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
- ArrayRef<const char *> StandardNames) {
+ ArrayRef<StringRef> StandardNames) {
// Verify that the StandardNames array is in alphabetical order.
assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
- [](const char *LHS, const char *RHS) {
- return strcmp(LHS, RHS) < 0;
+ [](StringRef LHS, StringRef RHS) {
+ return LHS < RHS;
}) &&
"TargetLibraryInfoImpl function names must be sorted");
+ bool ShouldExtI32Param = false, ShouldExtI32Return = false,
+ ShouldSignExtI32Param = false;
+ // PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
+ // returns corresponding to C-level ints and unsigned ints.
+ if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le ||
+ T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) {
+ ShouldExtI32Param = true;
+ ShouldExtI32Return = true;
+ }
+ // Mips, on the other hand, needs signext on i32 parameters corresponding
+ // to both signed and unsigned ints.
+ if (T.getArch() == Triple::mips || T.getArch() == Triple::mipsel ||
+ T.getArch() == Triple::mips64 || T.getArch() == Triple::mips64el) {
+ ShouldSignExtI32Param = true;
+ }
+ TLI.setShouldExtI32Param(ShouldExtI32Param);
+ TLI.setShouldExtI32Return(ShouldExtI32Return);
+ TLI.setShouldSignExtI32Param(ShouldSignExtI32Param);
+
if (T.getArch() == Triple::r600 ||
T.getArch() == Triple::amdgcn) {
TLI.setUnavailable(LibFunc::ldexp);
@@ -322,6 +342,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// on Linux.
//
// Fall through to disable all of them.
+ LLVM_FALLTHROUGH;
default:
TLI.setUnavailable(LibFunc::exp10);
TLI.setUnavailable(LibFunc::exp10f);
@@ -429,14 +450,19 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
}
TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
- : CustomNames(TLI.CustomNames) {
+ : CustomNames(TLI.CustomNames), ShouldExtI32Param(TLI.ShouldExtI32Param),
+ ShouldExtI32Return(TLI.ShouldExtI32Return),
+ ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) {
memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
VectorDescs = TLI.VectorDescs;
ScalarDescs = TLI.ScalarDescs;
}
TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
- : CustomNames(std::move(TLI.CustomNames)) {
+ : CustomNames(std::move(TLI.CustomNames)),
+ ShouldExtI32Param(TLI.ShouldExtI32Param),
+ ShouldExtI32Return(TLI.ShouldExtI32Return),
+ ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) {
std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
AvailableArray);
VectorDescs = TLI.VectorDescs;
@@ -445,12 +471,18 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
CustomNames = TLI.CustomNames;
+ ShouldExtI32Param = TLI.ShouldExtI32Param;
+ ShouldExtI32Return = TLI.ShouldExtI32Return;
+ ShouldSignExtI32Param = TLI.ShouldSignExtI32Param;
memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
return *this;
}
TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) {
CustomNames = std::move(TLI.CustomNames);
+ ShouldExtI32Param = TLI.ShouldExtI32Param;
+ ShouldExtI32Return = TLI.ShouldExtI32Return;
+ ShouldSignExtI32Param = TLI.ShouldSignExtI32Param;
std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
AvailableArray);
return *this;
@@ -469,16 +501,16 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
LibFunc::Func &F) const {
- const char *const *Start = &StandardNames[0];
- const char *const *End = &StandardNames[LibFunc::NumLibFuncs];
+ StringRef const *Start = &StandardNames[0];
+ StringRef const *End = &StandardNames[LibFunc::NumLibFuncs];
funcName = sanitizeFunctionName(funcName);
if (funcName.empty())
return false;
- const char *const *I = std::lower_bound(
- Start, End, funcName, [](const char *LHS, StringRef RHS) {
- return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+ StringRef const *I = std::lower_bound(
+ Start, End, funcName, [](StringRef LHS, StringRef RHS) {
+ return LHS < RHS;
});
if (I != End && *I == funcName) {
F = (LibFunc::Func)(I - Start);
@@ -535,7 +567,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
- // fallthrough
+ LLVM_FALLTHROUGH;
case LibFunc::strcpy:
case LibFunc::stpcpy:
return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(0) &&
@@ -547,7 +579,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
- // fallthrough
+ LLVM_FALLTHROUGH;
case LibFunc::strncpy:
case LibFunc::stpncpy:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
@@ -640,8 +672,9 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
- // fallthrough
+ LLVM_FALLTHROUGH;
case LibFunc::memcpy:
+ case LibFunc::mempcpy:
case LibFunc::memmove:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
@@ -652,7 +685,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
- // fallthrough
+ LLVM_FALLTHROUGH;
case LibFunc::memset:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
@@ -843,10 +876,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc::stat64:
case LibFunc::lstat64:
case LibFunc::statvfs64:
- return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() &&
+ return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
case LibFunc::dunder_isoc99_sscanf:
- return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() &&
+ return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
case LibFunc::fopen64:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
@@ -953,15 +986,18 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc::ffs:
case LibFunc::ffsl:
case LibFunc::ffsll:
+ case LibFunc::fls:
+ case LibFunc::flsl:
+ case LibFunc::flsll:
+ return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
+ FTy.getParamType(0)->isIntegerTy());
+
case LibFunc::isdigit:
case LibFunc::isascii:
case LibFunc::toascii:
return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
- FTy.getParamType(0)->isIntegerTy());
+ FTy.getReturnType() == FTy.getParamType(0));
- case LibFunc::fls:
- case LibFunc::flsl:
- case LibFunc::flsll:
case LibFunc::abs:
case LibFunc::labs:
case LibFunc::llabs:
@@ -1004,21 +1040,19 @@ void TargetLibraryInfoImpl::disableAllFunctions() {
}
static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
- return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName,
- std::strlen(RHS.ScalarFnName)) < 0;
+ return LHS.ScalarFnName < RHS.ScalarFnName;
}
static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
- return std::strncmp(LHS.VectorFnName, RHS.VectorFnName,
- std::strlen(RHS.VectorFnName)) < 0;
+ return LHS.VectorFnName < RHS.VectorFnName;
}
static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
- return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0;
+ return LHS.ScalarFnName < S;
}
static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
- return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0;
+ return LHS.VectorFnName < S;
}
void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
@@ -1074,6 +1108,75 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
addVectorizableFunctions(VecFuncs);
break;
}
+ case SVML: {
+ const VecDesc VecFuncs[] = {
+ {"sin", "__svml_sin2", 2},
+ {"sin", "__svml_sin4", 4},
+ {"sin", "__svml_sin8", 8},
+
+ {"sinf", "__svml_sinf4", 4},
+ {"sinf", "__svml_sinf8", 8},
+ {"sinf", "__svml_sinf16", 16},
+
+ {"cos", "__svml_cos2", 2},
+ {"cos", "__svml_cos4", 4},
+ {"cos", "__svml_cos8", 8},
+
+ {"cosf", "__svml_cosf4", 4},
+ {"cosf", "__svml_cosf8", 8},
+ {"cosf", "__svml_cosf16", 16},
+
+ {"pow", "__svml_pow2", 2},
+ {"pow", "__svml_pow4", 4},
+ {"pow", "__svml_pow8", 8},
+
+ {"powf", "__svml_powf4", 4},
+ {"powf", "__svml_powf8", 8},
+ {"powf", "__svml_powf16", 16},
+
+ {"llvm.pow.f64", "__svml_pow2", 2},
+ {"llvm.pow.f64", "__svml_pow4", 4},
+ {"llvm.pow.f64", "__svml_pow8", 8},
+
+ {"llvm.pow.f32", "__svml_powf4", 4},
+ {"llvm.pow.f32", "__svml_powf8", 8},
+ {"llvm.pow.f32", "__svml_powf16", 16},
+
+ {"exp", "__svml_exp2", 2},
+ {"exp", "__svml_exp4", 4},
+ {"exp", "__svml_exp8", 8},
+
+ {"expf", "__svml_expf4", 4},
+ {"expf", "__svml_expf8", 8},
+ {"expf", "__svml_expf16", 16},
+
+ {"llvm.exp.f64", "__svml_exp2", 2},
+ {"llvm.exp.f64", "__svml_exp4", 4},
+ {"llvm.exp.f64", "__svml_exp8", 8},
+
+ {"llvm.exp.f32", "__svml_expf4", 4},
+ {"llvm.exp.f32", "__svml_expf8", 8},
+ {"llvm.exp.f32", "__svml_expf16", 16},
+
+ {"log", "__svml_log2", 2},
+ {"log", "__svml_log4", 4},
+ {"log", "__svml_log8", 8},
+
+ {"logf", "__svml_logf4", 4},
+ {"logf", "__svml_logf8", 8},
+ {"logf", "__svml_logf16", 16},
+
+ {"llvm.log.f64", "__svml_log2", 2},
+ {"llvm.log.f64", "__svml_log4", 4},
+ {"llvm.log.f64", "__svml_log8", 8},
+
+ {"llvm.log.f32", "__svml_logf4", 4},
+ {"llvm.log.f32", "__svml_logf8", 8},
+ {"llvm.log.f32", "__svml_logf16", 16},
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
case NoLibrary:
break;
}
@@ -1162,7 +1265,7 @@ TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
-char TargetLibraryAnalysis::PassID;
+AnalysisKey TargetLibraryAnalysis::Key;
// Register the basic pass.
INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 52013f796c56..2a15b9b264e3 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -150,6 +150,11 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
return Cost;
}
+bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I,
+ int64_t Offset) const {
+ return TTIImpl->isFoldableMemAccessOffset(I, Offset);
+}
+
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
return TTIImpl->isTruncateFree(Ty1, Ty2);
}
@@ -173,6 +178,9 @@ unsigned TargetTransformInfo::getJumpBufSize() const {
bool TargetTransformInfo::shouldBuildLookupTables() const {
return TTIImpl->shouldBuildLookupTables();
}
+bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const {
+ return TTIImpl->shouldBuildLookupTablesForConstant(C);
+}
bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
@@ -186,11 +194,12 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
}
-bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth,
+bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
+ unsigned BitWidth,
unsigned AddressSpace,
unsigned Alignment,
bool *Fast) const {
- return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+ return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);
}
@@ -245,10 +254,6 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
return TTIImpl->getRegisterBitWidth(Vector);
}
-unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
- return TTIImpl->getLoadStoreVecRegBitWidth(AS);
-}
-
unsigned TargetTransformInfo::getCacheLineSize() const {
return TTIImpl->getCacheLineSize();
}
@@ -417,6 +422,44 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
return TTIImpl->areInlineCompatible(Caller, Callee);
}
+unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
+ return TTIImpl->getLoadStoreVecRegBitWidth(AS);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
+ return TTIImpl->isLegalToVectorizeLoad(LI);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
+ return TTIImpl->isLegalToVectorizeStore(SI);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeLoadChain(
+ unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
+ return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
+ AddrSpace);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeStoreChain(
+ unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
+ return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
+ AddrSpace);
+}
+
+unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
+ unsigned LoadSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const {
+ return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
+}
+
+unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
+ unsigned StoreSize,
+ unsigned ChainSizeInBytes,
+ VectorType *VecTy) const {
+ return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
+}
+
TargetTransformInfo::Concept::~Concept() {}
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
@@ -426,11 +469,11 @@ TargetIRAnalysis::TargetIRAnalysis(
: TTICallback(std::move(TTICallback)) {}
TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
- AnalysisManager<Function> &) {
+ FunctionAnalysisManager &) {
return TTICallback(F);
}
-char TargetIRAnalysis::PassID;
+AnalysisKey TargetIRAnalysis::Key;
TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
return Result(F.getParent()->getDataLayout());
@@ -457,7 +500,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
}
TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
- AnalysisManager<Function> DummyFAM;
+ FunctionAnalysisManager DummyFAM;
TTI = TIRA.run(F, DummyFAM);
return *TTI;
}
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 20d162a03c30..e920c4c4e6b2 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -135,34 +135,35 @@ using namespace llvm;
static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
namespace {
-/// TBAANode - This is a simple wrapper around an MDNode which provides a
-/// higher-level interface by hiding the details of how alias analysis
-/// information is encoded in its operands.
-class TBAANode {
- const MDNode *Node;
+/// This is a simple wrapper around an MDNode which provides a higher-level
+/// interface by hiding the details of how alias analysis information is encoded
+/// in its operands.
+template<typename MDNodeTy>
+class TBAANodeImpl {
+ MDNodeTy *Node;
public:
- TBAANode() : Node(nullptr) {}
- explicit TBAANode(const MDNode *N) : Node(N) {}
+ TBAANodeImpl() : Node(nullptr) {}
+ explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
/// getNode - Get the MDNode for this TBAANode.
- const MDNode *getNode() const { return Node; }
+ MDNodeTy *getNode() const { return Node; }
/// getParent - Get this TBAANode's Alias tree parent.
- TBAANode getParent() const {
+ TBAANodeImpl<MDNodeTy> getParent() const {
if (Node->getNumOperands() < 2)
- return TBAANode();
- MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ return TBAANodeImpl<MDNodeTy>();
+ MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
if (!P)
- return TBAANode();
+ return TBAANodeImpl<MDNodeTy>();
// Ok, this node has a valid parent. Return it.
- return TBAANode(P);
+ return TBAANodeImpl<MDNodeTy>(P);
}
- /// TypeIsImmutable - Test if this TBAANode represents a type for objects
- /// which are not modified (by any means) in the context where this
+ /// Test if this TBAANode represents a type for objects which are
+ /// not modified (by any means) in the context where this
/// AliasAnalysis is relevant.
- bool TypeIsImmutable() const {
+ bool isTypeImmutable() const {
if (Node->getNumOperands() < 3)
return false;
ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
@@ -172,32 +173,40 @@ public:
}
};
+/// \name Specializations of \c TBAANodeImpl for const and non const qualified
+/// \c MDNode.
+/// @{
+typedef TBAANodeImpl<const MDNode> TBAANode;
+typedef TBAANodeImpl<MDNode> MutableTBAANode;
+/// @}
+
/// This is a simple wrapper around an MDNode which provides a
/// higher-level interface by hiding the details of how alias analysis
/// information is encoded in its operands.
-class TBAAStructTagNode {
+template<typename MDNodeTy>
+class TBAAStructTagNodeImpl {
/// This node should be created with createTBAAStructTagNode.
- const MDNode *Node;
+ MDNodeTy *Node;
public:
- explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+ explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {}
/// Get the MDNode for this TBAAStructTagNode.
- const MDNode *getNode() const { return Node; }
+ MDNodeTy *getNode() const { return Node; }
- const MDNode *getBaseType() const {
+ MDNodeTy *getBaseType() const {
return dyn_cast_or_null<MDNode>(Node->getOperand(0));
}
- const MDNode *getAccessType() const {
+ MDNodeTy *getAccessType() const {
return dyn_cast_or_null<MDNode>(Node->getOperand(1));
}
uint64_t getOffset() const {
return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
}
- /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
- /// objects which are not modified (by any means) in the context where this
+ /// Test if this TBAAStructTagNode represents a type for objects
+ /// which are not modified (by any means) in the context where this
/// AliasAnalysis is relevant.
- bool TypeIsImmutable() const {
+ bool isTypeImmutable() const {
if (Node->getNumOperands() < 4)
return false;
ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
@@ -207,6 +216,13 @@ public:
}
};
+/// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
+/// qualified \c MDNods.
+/// @{
+typedef TBAAStructTagNodeImpl<const MDNode> TBAAStructTagNode;
+typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode;
+/// @}
+
/// This is a simple wrapper around an MDNode which provides a
/// higher-level interface by hiding the details of how alias analysis
/// information is encoded in its operands.
@@ -311,8 +327,8 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
- if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
- (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
return true;
return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
@@ -328,8 +344,8 @@ TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
// If this is an "immutable" type, we can assume the call doesn't write
// to memory.
if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
- if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
- (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+ if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
+ (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
Min = FMRB_OnlyReadsMemory;
return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
@@ -401,34 +417,31 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
return A;
// For struct-path aware TBAA, we use the access type of the tag.
- bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
- if (StructPath) {
- A = cast_or_null<MDNode>(A->getOperand(1));
- if (!A)
- return nullptr;
- B = cast_or_null<MDNode>(B->getOperand(1));
- if (!B)
- return nullptr;
- }
+ assert(isStructPathTBAA(A) && isStructPathTBAA(B) &&
+ "Auto upgrade should have taken care of this!");
+ A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType());
+ if (!A)
+ return nullptr;
+ B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType());
+ if (!B)
+ return nullptr;
SmallSetVector<MDNode *, 4> PathA;
- MDNode *T = A;
- while (T) {
- if (PathA.count(T))
+ MutableTBAANode TA(A);
+ while (TA.getNode()) {
+ if (PathA.count(TA.getNode()))
report_fatal_error("Cycle found in TBAA metadata.");
- PathA.insert(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
- : nullptr;
+ PathA.insert(TA.getNode());
+ TA = TA.getParent();
}
SmallSetVector<MDNode *, 4> PathB;
- T = B;
- while (T) {
- if (PathB.count(T))
+ MutableTBAANode TB(B);
+ while (TB.getNode()) {
+ if (PathB.count(TB.getNode()))
report_fatal_error("Cycle found in TBAA metadata.");
- PathB.insert(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
- : nullptr;
+ PathB.insert(TB.getNode());
+ TB = TB.getParent();
}
int IA = PathA.size() - 1;
@@ -443,11 +456,13 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
--IA;
--IB;
}
- if (!StructPath)
- return Ret;
- if (!Ret)
+ // We either did not find a match, or the only common base "type" is
+ // the root node. In either case, we don't have any useful TBAA
+ // metadata to attach.
+ if (!Ret || Ret->getNumOperands() < 2)
return nullptr;
+
// We need to convert from a type node to a tag node.
Type *Int64 = IntegerType::get(A->getContext(), 64);
Metadata *Ops[3] = {Ret, Ret,
@@ -478,52 +493,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
/// Aliases - Test whether the type represented by A may alias the
/// type represented by B.
bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
- // Make sure that both MDNodes are struct-path aware.
- if (isStructPathTBAA(A) && isStructPathTBAA(B))
- return PathAliases(A, B);
-
- // Keep track of the root node for A and B.
- TBAANode RootA, RootB;
-
- // Climb the tree from A to see if we reach B.
- for (TBAANode T(A);;) {
- if (T.getNode() == B)
- // B is an ancestor of A.
- return true;
-
- RootA = T;
- T = T.getParent();
- if (!T.getNode())
- break;
- }
-
- // Climb the tree from B to see if we reach A.
- for (TBAANode T(B);;) {
- if (T.getNode() == A)
- // A is an ancestor of B.
- return true;
-
- RootB = T;
- T = T.getParent();
- if (!T.getNode())
- break;
- }
-
- // Neither node is an ancestor of the other.
-
- // If they have different roots, they're part of different potentially
- // unrelated type systems, so we must be conservative.
- if (RootA.getNode() != RootB.getNode())
- return true;
-
- // If they have the same root, then we've proved there's no alias.
- return false;
-}
-
-/// Test whether the struct-path tag represented by A may alias the
-/// struct-path tag represented by B.
-bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
- // Verify that both input nodes are struct-path aware.
+ // Verify that both input nodes are struct-path aware. Auto-upgrade should
+ // have taken care of this.
assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
@@ -583,9 +554,9 @@ bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
return false;
}
-char TypeBasedAA::PassID;
+AnalysisKey TypeBasedAA::Key;
-TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> &AM) {
+TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
return TypeBasedAAResult();
}
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index 31e2b42075d6..f56754167360 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -69,8 +69,7 @@ void llvm::findDevirtualizableCallsForTypeTest(
// Find llvm.assume intrinsics for this llvm.type.test call.
for (const Use &CIU : CI->uses()) {
- auto AssumeCI = dyn_cast<CallInst>(CIU.getUser());
- if (AssumeCI) {
+ if (auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser())) {
Function *F = AssumeCI->getCalledFunction();
if (F && F->getIntrinsicID() == Intrinsic::assume)
Assumes.push_back(AssumeCI);
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index f2b40787443a..2a77baec6c36 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -51,6 +51,12 @@ const unsigned MaxDepth = 6;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
+// This optimization is known to cause performance regressions is some cases,
+// keep it under a temporary flag for now.
+static cl::opt<bool>
+DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits",
+ cl::Hidden, cl::init(true));
+
/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
/// 0). For vector types, returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -80,7 +86,7 @@ struct Query {
/// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
/// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so
/// on.
- std::array<const Value*, MaxDepth> Excluded;
+ std::array<const Value *, MaxDepth> Excluded;
unsigned NumExcluded;
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
@@ -119,10 +125,10 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
return nullptr;
}
-static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned Depth, const Query &Q);
-void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
@@ -130,7 +136,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Query(DL, AC, safeCxtI(V, CxtI), DT));
}
-bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
+bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
+ const DataLayout &DL,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
assert(LHS->getType() == RHS->getType() &&
@@ -145,10 +152,10 @@ bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
}
-static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
unsigned Depth, const Query &Q);
-void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
@@ -156,10 +163,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
Query(DL, AC, safeCxtI(V, CxtI), DT));
}
-static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q);
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
+bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
+ bool OrZero,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
@@ -167,15 +175,16 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
Query(DL, AC, safeCxtI(V, CxtI), DT));
}
-static bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q);
+static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
-bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
}
-bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
+ unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
bool NonNegative, Negative;
@@ -183,7 +192,7 @@ bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
return NonNegative;
}
-bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
if (auto *CI = dyn_cast<ConstantInt>(V))
@@ -195,7 +204,7 @@ bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth,
isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
}
-bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
bool NonNegative, Negative;
@@ -203,41 +212,45 @@ bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth,
return Negative;
}
-static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q);
+static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
-bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
+bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
+ const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
return ::isKnownNonEqual(V1, V2, Query(DL, AC,
safeCxtI(V1, safeCxtI(V2, CxtI)),
DT));
}
-static bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
+static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
const Query &Q);
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
+ const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT) {
return ::MaskedValueIsZero(V, Mask, Depth,
Query(DL, AC, safeCxtI(V, CxtI), DT));
}
-static unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q);
+static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
+ const Query &Q);
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL,
+unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
}
-static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
+ bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
unsigned Depth, const Query &Q) {
if (!Add) {
- if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+ if (const ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
// We know that the top bits of C-X are clear if X contains less bits
// than C (i.e. no wrap-around can happen). For example, 20-X is
// positive if we can prove that X is >= 0 and < 16.
@@ -311,7 +324,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
}
}
-static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
+static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
unsigned Depth, const Query &Q) {
@@ -398,7 +411,7 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
}
}
-static bool isEphemeralValueOf(Instruction *I, const Value *E) {
+static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
SmallVector<const Value *, 16> WorkSet(1, I);
SmallPtrSet<const Value *, 32> Visited;
SmallPtrSet<const Value *, 16> EphValues;
@@ -406,7 +419,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
// The instruction defining an assumption's condition itself is always
// considered ephemeral to that assumption (even if it has other
// non-ephemeral users). See r246696's test case for an example.
- if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+ if (is_contained(I->operands(), E))
return true;
while (!WorkSet.empty()) {
@@ -415,8 +428,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
continue;
// If all uses of this value are ephemeral, then so is this value.
- if (std::all_of(V->user_begin(), V->user_end(),
- [&](const User *U) { return EphValues.count(U); })) {
+ if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
if (V == E)
return true;
@@ -456,9 +468,9 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
return false;
}
-static bool isValidAssumeForContext(Value *V, const Instruction *CxtI,
- const DominatorTree *DT) {
- Instruction *Inv = cast<Instruction>(V);
+bool llvm::isValidAssumeForContext(const Instruction *Inv,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
// There are two restrictions on the use of an assume:
// 1. The assume must dominate the context (or the control flow must
@@ -469,54 +481,42 @@ static bool isValidAssumeForContext(Value *V, const Instruction *CxtI,
// the assume).
if (DT) {
- if (DT->dominates(Inv, CxtI)) {
+ if (DT->dominates(Inv, CxtI))
return true;
- } else if (Inv->getParent() == CxtI->getParent()) {
- // The context comes first, but they're both in the same block. Make sure
- // there is nothing in between that might interrupt the control flow.
- for (BasicBlock::const_iterator I =
- std::next(BasicBlock::const_iterator(CxtI)),
- IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
- return false;
-
- return !isEphemeralValueOf(Inv, CxtI);
- }
+ } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
+ // We don't have a DT, but this trivially dominates.
+ return true;
+ }
+ // With or without a DT, the only remaining case we will check is if the
+ // instructions are in the same BB. Give up if that is not the case.
+ if (Inv->getParent() != CxtI->getParent())
return false;
- }
- // When we don't have a DT, we do a limited search...
- if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
- return true;
- } else if (Inv->getParent() == CxtI->getParent()) {
+ // If we have a dom tree, then we now know that the assume doens't dominate
+ // the other instruction. If we don't have a dom tree then we can check if
+ // the assume is first in the BB.
+ if (!DT) {
// Search forward from the assume until we reach the context (or the end
// of the block); the common case is that the assume will come first.
- for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
+ for (auto I = std::next(BasicBlock::const_iterator(Inv)),
IE = Inv->getParent()->end(); I != IE; ++I)
if (&*I == CxtI)
return true;
-
- // The context must come first...
- for (BasicBlock::const_iterator I =
- std::next(BasicBlock::const_iterator(CxtI)),
- IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
- return false;
-
- return !isEphemeralValueOf(Inv, CxtI);
}
- return false;
-}
+ // The context comes first, but they're both in the same block. Make sure
+ // there is nothing in between that might interrupt the control flow.
+ for (BasicBlock::const_iterator I =
+ std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
+ I != IE; ++I)
+ if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+ return false;
-bool llvm::isValidAssumeForContext(const Instruction *I,
- const Instruction *CxtI,
- const DominatorTree *DT) {
- return ::isValidAssumeForContext(const_cast<Instruction *>(I), CxtI, DT);
+ return !isEphemeralValueOf(Inv, CxtI);
}
-static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
+static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
const Query &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
@@ -788,11 +788,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// shift amount, compute the implied known-zero or known-one bits of the shift
// operator's result respectively for that shift amount. The results from calling
// KZF and KOF are conservatively combined for all permitted shift amounts.
-template <typename KZFunctor, typename KOFunctor>
-static void computeKnownBitsFromShiftOperator(Operator *I,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2,
- unsigned Depth, const Query &Q, KZFunctor KZF, KOFunctor KOF) {
+static void computeKnownBitsFromShiftOperator(
+ const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2,
+ APInt &KnownOne2, unsigned Depth, const Query &Q,
+ function_ref<APInt(const APInt &, unsigned)> KZF,
+ function_ref<APInt(const APInt &, unsigned)> KOF) {
unsigned BitWidth = KnownZero.getBitWidth();
if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -801,6 +801,14 @@ static void computeKnownBitsFromShiftOperator(Operator *I,
computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
KnownZero = KZF(KnownZero, ShiftAmt);
KnownOne = KOF(KnownOne, ShiftAmt);
+ // If there is conflict between KnownZero and KnownOne, this must be an
+ // overflowing left shift, so the shift result is undefined. Clear KnownZero
+ // and KnownOne bits so that other code could propagate this undef.
+ if ((KnownZero & KnownOne) != 0) {
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ }
+
return;
}
@@ -866,7 +874,7 @@ static void computeKnownBitsFromShiftOperator(Operator *I,
}
}
-static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
+static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
const Query &Q) {
unsigned BitWidth = KnownZero.getBitWidth();
@@ -950,14 +958,64 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
break;
}
- case Instruction::Select:
+ case Instruction::Select: {
computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
+ const Value *LHS;
+ const Value *RHS;
+ SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
+ if (SelectPatternResult::isMinOrMax(SPF)) {
+ computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q);
+ computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q);
+ } else {
+ computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
+ }
+
+ unsigned MaxHighOnes = 0;
+ unsigned MaxHighZeros = 0;
+ if (SPF == SPF_SMAX) {
+ // If both sides are negative, the result is negative.
+ if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1])
+ // We can derive a lower bound on the result by taking the max of the
+ // leading one bits.
+ MaxHighOnes =
+ std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+ // If either side is non-negative, the result is non-negative.
+ else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1])
+ MaxHighZeros = 1;
+ } else if (SPF == SPF_SMIN) {
+ // If both sides are non-negative, the result is non-negative.
+ if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1])
+ // We can derive an upper bound on the result by taking the max of the
+ // leading zero bits.
+ MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
+ KnownZero2.countLeadingOnes());
+ // If either side is negative, the result is negative.
+ else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1])
+ MaxHighOnes = 1;
+ } else if (SPF == SPF_UMAX) {
+ // We can derive a lower bound on the result by taking the max of the
+ // leading one bits.
+ MaxHighOnes =
+ std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+ } else if (SPF == SPF_UMIN) {
+ // We can derive an upper bound on the result by taking the max of the
+ // leading zero bits.
+ MaxHighZeros =
+ std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes());
+ }
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
+ if (MaxHighOnes > 0)
+ KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes);
+ if (MaxHighZeros > 0)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros);
break;
+ }
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::FPToUI:
@@ -967,8 +1025,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
break; // Can't work with floating point.
case Instruction::PtrToInt:
case Instruction::IntToPtr:
- case Instruction::AddrSpaceCast: // Pointers could be different sizes.
- // FALL THROUGH and handle them the same as zext/trunc.
+ // Fall through and handle them the same as zext/trunc.
+ LLVM_FALLTHROUGH;
case Instruction::ZExt:
case Instruction::Trunc: {
Type *SrcTy = I->getOperand(0)->getType();
@@ -1020,13 +1078,23 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
}
case Instruction::Shl: {
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
- auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
- return (KnownZero << ShiftAmt) |
- APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ auto KZF = [BitWidth, NSW](const APInt &KnownZero, unsigned ShiftAmt) {
+ APInt KZResult =
+ (KnownZero << ShiftAmt) |
+ APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+ // If this shift has "nsw" keyword, then the result is either a poison
+ // value or has the same sign bit as the first operand.
+ if (NSW && KnownZero.isNegative())
+ KZResult.setBit(BitWidth - 1);
+ return KZResult;
};
- auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
- return KnownOne << ShiftAmt;
+ auto KOF = [BitWidth, NSW](const APInt &KnownOne, unsigned ShiftAmt) {
+ APInt KOResult = KnownOne << ShiftAmt;
+ if (NSW && KnownOne.isNegative())
+ KOResult.setBit(BitWidth - 1);
+ return KOResult;
};
computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
@@ -1143,7 +1211,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
}
case Instruction::Alloca: {
- AllocaInst *AI = cast<AllocaInst>(I);
+ const AllocaInst *AI = cast<AllocaInst>(I);
unsigned Align = AI->getAlignment();
if (Align == 0)
Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
@@ -1163,7 +1231,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
gep_type_iterator GTI = gep_type_begin(I);
for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
Value *Index = I->getOperand(i);
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
// Handle struct member offset arithmetic.
// Handle case when index is vector zeroinitializer
@@ -1200,7 +1268,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
break;
}
case Instruction::PHI: {
- PHINode *P = cast<PHINode>(I);
+ const PHINode *P = cast<PHINode>(I);
// Handle the case of a simple two-predecessor recurrence PHI.
// There's a lot more that could theoretically be done here, but
// this is sufficient to catch some interesting cases.
@@ -1237,9 +1305,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q);
- KnownZero = APInt::getLowBitsSet(BitWidth,
- std::min(KnownZero2.countTrailingOnes(),
- KnownZero3.countTrailingOnes()));
+ KnownZero = APInt::getLowBitsSet(
+ BitWidth, std::min(KnownZero2.countTrailingOnes(),
+ KnownZero3.countTrailingOnes()));
+
+ if (DontImproveNonNegativePhiBits)
+ break;
+
+ auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
+ if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
+ // If initial value of recurrence is nonnegative, and we are adding
+ // a nonnegative number with nsw, the result can only be nonnegative
+ // or poison value regardless of the number of times we execute the
+ // add in phi recurrence. If initial value is negative and we are
+ // adding a negative number with nsw, the result can only be
+ // negative or poison value. Similar arguments apply to sub and mul.
+ //
+ // (add non-negative, non-negative) --> non-negative
+ // (add negative, negative) --> negative
+ if (Opcode == Instruction::Add) {
+ if (KnownZero2.isNegative() && KnownZero3.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ else if (KnownOne2.isNegative() && KnownOne3.isNegative())
+ KnownOne.setBit(BitWidth - 1);
+ }
+
+ // (sub nsw non-negative, negative) --> non-negative
+ // (sub nsw negative, non-negative) --> negative
+ else if (Opcode == Instruction::Sub && LL == I) {
+ if (KnownZero2.isNegative() && KnownOne3.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ else if (KnownOne2.isNegative() && KnownZero3.isNegative())
+ KnownOne.setBit(BitWidth - 1);
+ }
+
+ // (mul nsw non-negative, non-negative) --> non-negative
+ else if (Opcode == Instruction::Mul && KnownZero2.isNegative() &&
+ KnownZero3.isNegative())
+ KnownZero.setBit(BitWidth - 1);
+ }
+
break;
}
}
@@ -1284,12 +1389,12 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
// function.
if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
- if (Value *RV = CallSite(I).getReturnedArgOperand()) {
+ if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q);
KnownZero |= KnownZero2;
KnownOne |= KnownOne2;
}
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::bswap:
@@ -1326,9 +1431,16 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
}
}
break;
+ case Instruction::ExtractElement:
+ // Look through extract element. At the moment we keep this simple and skip
+ // tracking the specific element. But at least we might find information
+ // valid for all elements of the vector (for example if vector is sign
+ // extended, shifted, etc).
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+ break;
case Instruction::ExtractValue:
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
- ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+ const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
if (EVI->getNumIndices() != 1) break;
if (EVI->getIndices()[0] == 0) {
switch (II->getIntrinsicID()) {
@@ -1372,7 +1484,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned Depth, const Query &Q) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
@@ -1388,9 +1500,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownOne.getBitWidth() == BitWidth &&
"V, KnownOne and KnownZero should have same BitWidth");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- // We know all of the bits for a constant!
- KnownOne = CI->getValue();
+ const APInt *C;
+ if (match(V, m_APInt(C))) {
+ // We know all of the bits for a scalar constant or a splat vector constant!
+ KnownOne = *C;
KnownZero = ~KnownOne;
return;
}
@@ -1402,7 +1515,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
}
// Handle a constant vector by taking the intersection of the known bits of
// each element.
- if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+ if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
// We know that CDS must be a vector of integers. Take the intersection of
// each element.
KnownZero.setAllBits(); KnownOne.setAllBits();
@@ -1415,7 +1528,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
return;
}
- if (auto *CV = dyn_cast<ConstantVector>(V)) {
+ if (const auto *CV = dyn_cast<ConstantVector>(V)) {
// We know that CV must be a vector of integers. Take the intersection of
// each element.
KnownZero.setAllBits(); KnownOne.setAllBits();
@@ -1438,6 +1551,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Start out not knowing anything.
KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ // We can't imply anything about undefs.
+ if (isa<UndefValue>(V))
+ return;
+
+ // There's no point in looking through other users of ConstantData for
+ // assumptions. Confirm that we've handled them all.
+ assert(!isa<ConstantData>(V) && "Unhandled constant data!");
+
// Limit search depth.
// All recursive calls that increase depth must come after this.
if (Depth == MaxDepth)
@@ -1445,13 +1566,13 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
// the bits of its aliasee.
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (!GA->isInterposable())
computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q);
return;
}
- if (Operator *I = dyn_cast<Operator>(V))
+ if (const Operator *I = dyn_cast<Operator>(V))
computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q);
// Aligned pointers have trailing zeros - refine KnownZero set
@@ -1472,7 +1593,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
/// Determine whether the sign bit is known to be zero or one.
/// Convenience wrapper around computeKnownBits.
-void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
unsigned Depth, const Query &Q) {
unsigned BitWidth = getBitWidth(V->getType(), Q.DL);
if (!BitWidth) {
@@ -1491,9 +1612,9 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
/// bit set when defined. For vectors return true if every element is known to
/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
-bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
const Query &Q) {
- if (Constant *C = dyn_cast<Constant>(V)) {
+ if (const Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return OrZero;
@@ -1523,10 +1644,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
match(V, m_LShr(m_Value(X), m_Value()))))
return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
- if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+ if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
- if (SelectInst *SI = dyn_cast<SelectInst>(V))
+ if (const SelectInst *SI = dyn_cast<SelectInst>(V))
return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
@@ -1544,7 +1665,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
// Adding a power-of-two or zero to the same power-of-two or zero yields
// either the original power-of-two, a larger power-of-two or zero.
if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
- OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
+ const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
if (match(X, m_And(m_Specific(Y), m_Value())) ||
match(X, m_And(m_Value(), m_Specific(Y))))
@@ -1590,7 +1711,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
/// to be non-null.
///
/// Currently this routine does not support vector GEPs.
-static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
+static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
const Query &Q) {
if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
return false;
@@ -1609,7 +1730,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
GTI != GTE; ++GTI) {
// Struct types are easy -- they must always be indexed by a constant.
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = Q.DL.getStructLayout(STy);
@@ -1649,7 +1770,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
/// Does the 'Range' metadata (which must be a valid MD_range operand list)
/// ensure that the value it's attached to is never Value? 'RangeType' is
/// is the type of the value described by the range.
-static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) {
+static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
const unsigned NumRanges = Ranges->getNumOperands() / 2;
assert(NumRanges >= 1);
for (unsigned i = 0; i < NumRanges; ++i) {
@@ -1668,7 +1789,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) {
/// For vectors return true if every element is known to be non-zero when
/// defined. Supports values with integer or pointer type and vectors of
/// integers.
-bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
+bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (auto *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return false;
@@ -1712,7 +1833,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
if (V->getType()->isPointerTy()) {
if (isKnownNonNull(V))
return true;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, Depth, Q))
return true;
}
@@ -1732,7 +1853,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
// if the lowest bit is shifted off the end.
if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
// shl nuw can't remove any non-zero bits.
- OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+ const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
if (BO->hasNoUnsignedWrap())
return isKnownNonZero(X, Depth, Q);
@@ -1746,7 +1867,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
// defined if the sign bit is shifted off the end.
else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
// shr exact can only shift out zero bits.
- PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
+ const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
if (BO->isExact())
return isKnownNonZero(X, Depth, Q);
@@ -1817,7 +1938,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
}
// X * Y.
else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
- OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+ const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
@@ -1825,13 +1946,13 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
return true;
}
// (C ? X : Y) != 0 if X != 0 and Y != 0.
- else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
if (isKnownNonZero(SI->getTrueValue(), Depth, Q) &&
isKnownNonZero(SI->getFalseValue(), Depth, Q))
return true;
}
// PHI
- else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
// Try and detect a recurrence that monotonically increases from a
// starting value, as these are common as induction variables.
if (PN->getNumIncomingValues() == 2) {
@@ -1865,8 +1986,8 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
}
/// Return true if V2 == V1 + X, where X is known non-zero.
-static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) {
- BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
+static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
+ const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
if (!BO || BO->getOpcode() != Instruction::Add)
return false;
Value *Op = nullptr;
@@ -1880,7 +2001,7 @@ static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) {
}
/// Return true if it is known that V1 != V2.
-static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) {
+static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
if (V1->getType()->isVectorTy() || V1 == V2)
return false;
if (V1->getType() != V2->getType())
@@ -1916,7 +2037,7 @@ static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) {
/// where V is a vector, the mask, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
+bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
const Query &Q) {
APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
@@ -1927,8 +2048,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
/// minimum number of sign bits. Return 0 if the value is not a vector constant
/// or if any element was not analyzed; otherwise, return the count for the
/// element with the minimum number of sign bits.
-static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) {
- auto *CV = dyn_cast<Constant>(V);
+static unsigned computeNumSignBitsVectorConstant(const Value *V,
+ unsigned TyBits) {
+ const auto *CV = dyn_cast<Constant>(V);
if (!CV || !CV->getType()->isVectorTy())
return 0;
@@ -1956,7 +2078,7 @@ static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) {
/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
/// other, so we return 3. For vectors, return the number of sign bits for the
/// vector element with the mininum number of known sign bits.
-unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
+unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -1964,10 +2086,10 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
// Note that ConstantInt is handled by the general computeKnownBits case
// below.
- if (Depth == 6)
+ if (Depth == MaxDepth)
return 1; // Limit search depth.
- Operator *U = dyn_cast<Operator>(V);
+ const Operator *U = dyn_cast<Operator>(V);
switch (Operator::getOpcode(V)) {
default: break;
case Instruction::SExt:
@@ -2125,7 +2247,7 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
return std::min(Tmp, Tmp2)-1;
case Instruction::PHI: {
- PHINode *PN = cast<PHINode>(U);
+ const PHINode *PN = cast<PHINode>(U);
unsigned NumIncomingValues = PN->getNumIncomingValues();
// Don't analyze large in-degree PHIs.
if (NumIncomingValues > 4) break;
@@ -2147,6 +2269,13 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
// FIXME: it's tricky to do anything useful for this, but it is an important
// case for targets like X86.
break;
+
+ case Instruction::ExtractElement:
+ // Look through extract element. At the moment we keep this simple and skip
+ // tracking the specific element. But at least we might find information
+ // valid for all elements of the vector (for example if vector is sign
+ // extended, shifted, etc).
+ return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2416,7 +2545,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
// FIXME: Magic number! At the least, this should be given a name because it's
// used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
// expose it as a parameter, so it can be used for testing / experimenting.
- if (Depth == 6)
+ if (Depth == MaxDepth)
return false; // Limit search depth.
const Operator *I = dyn_cast<Operator>(V);
@@ -2463,7 +2592,7 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
// FIXME: Magic number! At the least, this should be given a name because it's
// used similarly in CannotBeNegativeZero(). A better fix may be to
// expose it as a parameter, so it can be used for testing / experimenting.
- if (Depth == 6)
+ if (Depth == MaxDepth)
return false; // Limit search depth.
const Operator *I = dyn_cast<Operator>(V);
@@ -2478,7 +2607,7 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
// x*x is always non-negative or a NaN.
if (I->getOperand(0) == I->getOperand(1))
return true;
- // Fall through
+ LLVM_FALLTHROUGH;
case Instruction::FAdd:
case Instruction::FDiv:
case Instruction::FRem:
@@ -2768,11 +2897,17 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
break;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- APInt GEPOffset(BitWidth, 0);
+ // If one of the values we have visited is an addrspacecast, then
+ // the pointer type of this GEP may be different from the type
+ // of the Ptr parameter which was passed to this function. This
+ // means when we construct GEPOffset, we need to use the size
+ // of GEP's pointer type rather than the size of the original
+ // pointer type.
+ APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
break;
- ByteOffset += GEPOffset;
+ ByteOffset += GEPOffset.getSExtValue();
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -2886,13 +3021,14 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
+static uint64_t GetStringLengthH(const Value *V,
+ SmallPtrSetImpl<const PHINode*> &PHIs) {
// Look through noop bitcast instructions.
V = V->stripPointerCasts();
// If this is a PHI node, there are two cases: either we have already seen it
// or we haven't.
- if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (const PHINode *PN = dyn_cast<PHINode>(V)) {
if (!PHIs.insert(PN).second)
return ~0ULL; // already in the set.
@@ -2914,7 +3050,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
}
// strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
- if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+ if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
if (Len1 == 0) return 0;
uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
@@ -2935,10 +3071,10 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
/// If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
-uint64_t llvm::GetStringLength(Value *V) {
+uint64_t llvm::GetStringLength(const Value *V) {
if (!V->getType()->isPointerTy()) return 0;
- SmallPtrSet<PHINode*, 32> PHIs;
+ SmallPtrSet<const PHINode*, 32> PHIs;
uint64_t Len = GetStringLengthH(V, PHIs);
// If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
// an empty string as a length.
@@ -2947,7 +3083,8 @@ uint64_t llvm::GetStringLength(Value *V) {
/// \brief \p PN defines a loop-variant pointer to an object. Check if the
/// previous iteration of the loop was referring to the same object as \p PN.
-static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
+ const LoopInfo *LI) {
// Find the loop-defined value.
Loop *L = LI->getLoopFor(PN->getParent());
if (PN->getNumIncomingValues() != 2)
@@ -3208,11 +3345,11 @@ bool llvm::isKnownNonNull(const Value *V) {
if (const Argument *A = dyn_cast<Argument>(V))
return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
- // A global variable in address space 0 is non null unless extern weak.
- // Other address spaces may have null as a valid address for a global,
- // so we can't assume anything.
+ // A global variable in address space 0 is non null unless extern weak
+ // or an absolute symbol reference. Other address spaces may have null as a
+ // valid address for a global, so we can't assume anything.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return !GV->hasExternalWeakLinkage() &&
+ return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
GV->getType()->getAddressSpace() == 0;
// A Load tagged with nonnull metadata is never null.
@@ -3230,6 +3367,9 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
assert(V->getType()->isPointerTy() && "V must be pointer type");
+ assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
+ assert(CtxI && "Context instruction required for analysis");
+ assert(DT && "Dominator tree required for analysis");
unsigned NumUsesExplored = 0;
for (auto *U : V->users()) {
@@ -3266,13 +3406,20 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
const DominatorTree *DT) {
+ if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V))
+ return false;
+
if (isKnownNonNull(V))
return true;
- return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false;
+ if (!CtxI || !DT)
+ return false;
+
+ return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT);
}
-OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
+ const Value *RHS,
const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
@@ -3322,7 +3469,8 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
return OverflowResult::MayOverflow;
}
-OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
+ const Value *RHS,
const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
@@ -3351,9 +3499,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
return OverflowResult::MayOverflow;
}
-static OverflowResult computeOverflowForSignedAdd(
- Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
- AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
+ const Value *RHS,
+ const AddOperator *Add,
+ const DataLayout &DL,
+ AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
if (Add && Add->hasNoSignedWrap()) {
return OverflowResult::NeverOverflows;
}
@@ -3395,7 +3547,8 @@ static OverflowResult computeOverflowForSignedAdd(
return OverflowResult::MayOverflow;
}
-bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
+bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+ const DominatorTree &DT) {
#ifndef NDEBUG
auto IID = II->getIntrinsicID();
assert((IID == Intrinsic::sadd_with_overflow ||
@@ -3407,11 +3560,11 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
"Not an overflow intrinsic!");
#endif
- SmallVector<BranchInst *, 2> GuardingBranches;
- SmallVector<ExtractValueInst *, 2> Results;
+ SmallVector<const BranchInst *, 2> GuardingBranches;
+ SmallVector<const ExtractValueInst *, 2> Results;
- for (User *U : II->users()) {
- if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+ for (const User *U : II->users()) {
+ if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
if (EVI->getIndices()[0] == 0)
@@ -3419,8 +3572,8 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
else {
assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
- for (auto *U : EVI->users())
- if (auto *B = dyn_cast<BranchInst>(U)) {
+ for (const auto *U : EVI->users())
+ if (const auto *B = dyn_cast<BranchInst>(U)) {
assert(B->isConditional() && "How else is it using an i1?");
GuardingBranches.push_back(B);
}
@@ -3432,13 +3585,13 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
}
}
- auto AllUsesGuardedByBranch = [&](BranchInst *BI) {
+ auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
if (!NoWrapEdge.isSingleEdge())
return false;
// Check if all users of the add are provably no-wrap.
- for (auto *Result : Results) {
+ for (const auto *Result : Results) {
// If the extractvalue itself is not executed on overflow, the we don't
// need to check each use separately, since domination is transitive.
if (DT.dominates(NoWrapEdge, Result->getParent()))
@@ -3456,7 +3609,7 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
}
-OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
@@ -3465,7 +3618,8 @@ OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
Add, DL, AC, CxtI, DT);
}
-OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
+ const Value *RHS,
const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
@@ -3502,12 +3656,27 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
return false;
// Calls can throw, or contain an infinite loop, or kill the process.
- if (CallSite CS = CallSite(const_cast<Instruction*>(I))) {
- // Calls which don't write to arbitrary memory are safe.
- // FIXME: Ignoring infinite loops without any side-effects is too aggressive,
- // but it's consistent with other passes. See http://llvm.org/PR965 .
- // FIXME: This isn't aggressive enough; a call which only writes to a
- // global is guaranteed to return.
+ if (auto CS = ImmutableCallSite(I)) {
+ // Call sites that throw have implicit non-local control flow.
+ if (!CS.doesNotThrow())
+ return false;
+
+ // Non-throwing call sites can loop infinitely, call exit/pthread_exit
+ // etc. and thus not return. However, LLVM already assumes that
+ //
+ // - Thread exiting actions are modeled as writes to memory invisible to
+ // the program.
+ //
+ // - Loops that don't have side effects (side effects are volatile/atomic
+ // stores and IO) always terminate (see http://llvm.org/PR965).
+ // Furthermore IO itself is also modeled as writes to memory invisible to
+ // the program.
+ //
+ // We rely on those assumptions here, and use the memory effects of the call
+ // target as a proxy for checking that it always returns.
+
+ // FIXME: This isn't aggressive enough; a call which only writes to a global
+ // is guaranteed to return.
return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
match(I, m_Intrinsic<Intrinsic::assume>());
}
@@ -3688,7 +3857,7 @@ bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
return false;
}
-static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
if (FMF.noNaNs())
return true;
@@ -3697,12 +3866,90 @@ static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
return false;
}
-static bool isKnownNonZero(Value *V) {
+static bool isKnownNonZero(const Value *V) {
if (auto *C = dyn_cast<ConstantFP>(V))
return !C->isZero();
return false;
}
+/// Match non-obvious integer minimum and maximum sequences.
+static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
+ Value *CmpLHS, Value *CmpRHS,
+ Value *TrueVal, Value *FalseVal,
+ Value *&LHS, Value *&RHS) {
+ if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
+ return {SPF_UNKNOWN, SPNB_NA, false};
+
+ // Z = X -nsw Y
+ // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
+ // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
+ if (match(TrueVal, m_Zero()) &&
+ match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+ LHS = TrueVal;
+ RHS = FalseVal;
+ return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
+ }
+
+ // Z = X -nsw Y
+ // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
+ // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
+ if (match(FalseVal, m_Zero()) &&
+ match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+ LHS = TrueVal;
+ RHS = FalseVal;
+ return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+ }
+
+ const APInt *C1;
+ if (!match(CmpRHS, m_APInt(C1)))
+ return {SPF_UNKNOWN, SPNB_NA, false};
+
+ // An unsigned min/max can be written with a signed compare.
+ const APInt *C2;
+ if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
+ (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
+ // Is the sign bit set?
+ // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
+ // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
+ if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) {
+ LHS = TrueVal;
+ RHS = FalseVal;
+ return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+ }
+
+ // Is the sign bit clear?
+ // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
+ // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
+ if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
+ C2->isMinSignedValue()) {
+ LHS = TrueVal;
+ RHS = FalseVal;
+ return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+ }
+ }
+
+ // Look through 'not' ops to find disguised signed min/max.
+ // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
+ // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
+ if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
+ match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) {
+ LHS = TrueVal;
+ RHS = FalseVal;
+ return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
+ }
+
+ // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
+ // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
+ if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
+ match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) {
+ LHS = TrueVal;
+ RHS = FalseVal;
+ return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+ }
+
+ return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
FastMathFlags FMF,
Value *CmpLHS, Value *CmpRHS,
@@ -3801,39 +4048,26 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
}
}
- if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
+ const APInt *C1;
+ if (match(CmpRHS, m_APInt(C1))) {
if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
(CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
// ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
// NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
- if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
+ if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) {
return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
// ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
// NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
- if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
+ if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) {
return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
}
}
-
- // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C)
- if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) {
- if (Pred == ICmpInst::ICMP_SGT && C1->getType() == C2->getType() &&
- ~C1->getValue() == C2->getValue() &&
- (match(TrueVal, m_Not(m_Specific(CmpLHS))) ||
- match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
- LHS = TrueVal;
- RHS = FalseVal;
- return {SPF_SMIN, SPNB_NA, false};
- }
- }
}
- // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
-
- return {SPF_UNKNOWN, SPNB_NA, false};
+ return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
}
static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
@@ -3932,30 +4166,9 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
LHS, RHS);
}
-ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) {
- const unsigned NumRanges = Ranges.getNumOperands() / 2;
- assert(NumRanges >= 1 && "Must have at least one range!");
- assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
-
- auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
- auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
-
- ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
-
- for (unsigned i = 1; i < NumRanges; ++i) {
- auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
- auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
-
- // Note: unionWith will potentially create a range that contains values not
- // contained in any of the original N ranges.
- CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
- }
-
- return CR;
-}
-
/// Return true if "icmp Pred LHS RHS" is always true.
-static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+static bool isTruePredicate(CmpInst::Predicate Pred,
+ const Value *LHS, const Value *RHS,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
@@ -3984,7 +4197,8 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
return true;
// Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
- auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X,
+ auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B,
+ const Value *&X,
const APInt *&CA, const APInt *&CB) {
if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
@@ -4004,7 +4218,7 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
return false;
};
- Value *X;
+ const Value *X;
const APInt *CLHS, *CRHS;
if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
return CLHS->ule(*CRHS);
@@ -4017,8 +4231,9 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
/// ALHS ARHS" is true. Otherwise, return None.
static Optional<bool>
-isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS,
- Value *BLHS, Value *BRHS, const DataLayout &DL,
+isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
+ const Value *ARHS, const Value *BLHS,
+ const Value *BRHS, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT) {
switch (Pred) {
@@ -4045,7 +4260,8 @@ isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS,
/// Return true if the operands of the two compares match. IsSwappedOps is true
/// when the operands match, but are swapped.
-static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS,
+static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
+ const Value *BLHS, const Value *BRHS,
bool &IsSwappedOps) {
bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS);
@@ -4057,9 +4273,11 @@ static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS,
/// true. Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
/// BRHS" is false. Otherwise, return None if we can't infer anything.
static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
- Value *ALHS, Value *ARHS,
+ const Value *ALHS,
+ const Value *ARHS,
CmpInst::Predicate BPred,
- Value *BLHS, Value *BRHS,
+ const Value *BLHS,
+ const Value *BRHS,
bool IsSwappedOps) {
// Canonicalize the operands so they're matching.
if (IsSwappedOps) {
@@ -4078,9 +4296,10 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
/// true. Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
/// C2" is false. Otherwise, return None if we can't infer anything.
static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS,
- ConstantInt *C1, CmpInst::Predicate BPred,
- Value *BLHS, ConstantInt *C2) {
+isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
+ const ConstantInt *C1,
+ CmpInst::Predicate BPred,
+ const Value *BLHS, const ConstantInt *C2) {
assert(ALHS == BLHS && "LHS operands must match.");
ConstantRange DomCR =
ConstantRange::makeExactICmpRegion(APred, C1->getValue());
@@ -4095,7 +4314,7 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS,
return None;
}
-Optional<bool> llvm::isImpliedCondition(Value *LHS, Value *RHS,
+Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
const DataLayout &DL, bool InvertAPred,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 53e7153a350f..7e598f435ff5 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -107,11 +107,11 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
// Find the type we're currently indexing into.
gep_type_iterator GEPTI = gep_type_begin(Gep);
- std::advance(GEPTI, LastOperand - 1);
+ std::advance(GEPTI, LastOperand - 2);
// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
- if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
break;
--LastOperand;
}
@@ -454,9 +454,10 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
I0->getAllMetadataOtherThanDebugLoc(Metadata);
- for (auto Kind : { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
- LLVMContext::MD_nontemporal }) {
+ for (auto Kind :
+ {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+ LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {
MDNode *MD = I0->getMetadata(Kind);
for (int J = 1, E = VL.size(); MD && J != E; ++J) {
@@ -469,13 +470,12 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
case LLVMContext::MD_alias_scope:
MD = MDNode::getMostGenericAliasScope(MD, IMD);
break;
- case LLVMContext::MD_noalias:
- MD = MDNode::intersect(MD, IMD);
- break;
case LLVMContext::MD_fpmath:
MD = MDNode::getMostGenericFPMath(MD, IMD);
break;
+ case LLVMContext::MD_noalias:
case LLVMContext::MD_nontemporal:
+ case LLVMContext::MD_invariant_load:
MD = MDNode::intersect(MD, IMD);
break;
default: