summaryrefslogtreecommitdiff
path: root/include/llvm/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/Analysis')
-rw-r--r--include/llvm/Analysis/AliasAnalysis.h14
-rw-r--r--include/llvm/Analysis/AliasSetTracker.h7
-rw-r--r--include/llvm/Analysis/AssumptionCache.h21
-rw-r--r--include/llvm/Analysis/BasicAliasAnalysis.h18
-rw-r--r--include/llvm/Analysis/BlockFrequencyInfo.h10
-rw-r--r--include/llvm/Analysis/BlockFrequencyInfoImpl.h7
-rw-r--r--include/llvm/Analysis/BranchProbabilityInfo.h2
-rw-r--r--include/llvm/Analysis/CFGPrinter.h3
-rw-r--r--include/llvm/Analysis/CGSCCPassManager.h1
-rw-r--r--include/llvm/Analysis/CallGraph.h2
-rw-r--r--include/llvm/Analysis/ConstantFolding.h6
-rw-r--r--include/llvm/Analysis/DominanceFrontier.h4
-rw-r--r--include/llvm/Analysis/IndirectCallSiteVisitor.h12
-rw-r--r--include/llvm/Analysis/InlineCost.h6
-rw-r--r--include/llvm/Analysis/InstructionSimplify.h12
-rw-r--r--include/llvm/Analysis/LazyBlockFrequencyInfo.h104
-rw-r--r--include/llvm/Analysis/LazyBranchProbabilityInfo.h12
-rw-r--r--include/llvm/Analysis/LazyCallGraph.h493
-rw-r--r--include/llvm/Analysis/LazyValueInfo.h15
-rw-r--r--include/llvm/Analysis/Loads.h31
-rw-r--r--include/llvm/Analysis/LoopAccessAnalysis.h37
-rw-r--r--include/llvm/Analysis/LoopInfo.h30
-rw-r--r--include/llvm/Analysis/LoopInfoImpl.h150
-rw-r--r--include/llvm/Analysis/MemoryBuiltins.h43
-rw-r--r--include/llvm/Analysis/MemorySSA.h1155
-rw-r--r--include/llvm/Analysis/MemorySSAUpdater.h153
-rw-r--r--include/llvm/Analysis/ObjectUtils.h42
-rw-r--r--include/llvm/Analysis/OptimizationDiagnosticInfo.h150
-rw-r--r--include/llvm/Analysis/PostDominators.h4
-rw-r--r--include/llvm/Analysis/ProfileSummaryInfo.h16
-rw-r--r--include/llvm/Analysis/PtrUseVisitor.h5
-rw-r--r--include/llvm/Analysis/RegionInfo.h4
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h71
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpressions.h66
-rw-r--r--include/llvm/Analysis/ScalarEvolutionNormalization.h45
-rw-r--r--include/llvm/Analysis/TargetLibraryInfo.def2
-rw-r--r--include/llvm/Analysis/TargetLibraryInfo.h78
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h161
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h33
-rw-r--r--include/llvm/Analysis/TypeMetadataUtils.h5
-rw-r--r--include/llvm/Analysis/ValueTracking.h32
-rw-r--r--include/llvm/Analysis/VectorUtils.h59
42 files changed, 2408 insertions, 713 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index d8e50438e722..1b8b9751faa1 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -443,11 +443,7 @@ public:
/// getModRefInfo (for fences) - Return information about whether
/// a particular store modifies or reads the specified memory location.
- ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
- // Conservatively correct. (We could possibly be a bit smarter if
- // Loc is a alloca that doesn't escape.)
- return MRI_ModRef;
- }
+ ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc);
/// getModRefInfo (for fences) - A convenience wrapper.
ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size) {
@@ -528,6 +524,14 @@ public:
/// Check whether or not an instruction may read or write the specified
/// memory location.
///
+ /// Note explicitly that getModRefInfo considers the effects of reading and
+ /// writing the memory location, and not the effect of ordering relative to
+ /// other instructions. Thus, a volatile load is considered to be Ref,
+ /// because it does not actually write memory, it just can't be reordered
+ /// relative to other volatiles (or removed). Atomic ordered loads/stores are
+ /// considered ModRef ATM because conservatively, the visible effect appears
+ /// as if memory was written, not just an ordering constraint.
+ ///
/// An instruction that doesn't read or write memory may be trivially LICM'd
/// for example.
///
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 5d11b22c6eed..eac97501c759 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -121,7 +121,10 @@ class AliasSet : public ilist_node<AliasSet> {
AliasSet *Forward;
/// All instructions without a specific address in this alias set.
- std::vector<AssertingVH<Instruction> > UnknownInsts;
+ /// In rare cases this vector can have a null'ed out WeakVH
+ /// instances (can happen if some other loop pass deletes an
+ /// instruction in this list).
+ std::vector<WeakVH> UnknownInsts;
/// Number of nodes pointing to this AliasSet plus the number of AliasSets
/// forwarding to it.
@@ -171,7 +174,7 @@ class AliasSet : public ilist_node<AliasSet> {
Instruction *getUnknownInst(unsigned i) const {
assert(i < UnknownInsts.size());
- return UnknownInsts[i];
+ return cast_or_null<Instruction>(UnknownInsts[i]);
}
public:
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index 79287ed76f2e..f833f417c7dd 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -31,11 +31,10 @@ namespace llvm {
/// \brief A cache of @llvm.assume calls within a function.
///
/// This cache provides fast lookup of assumptions within a function by caching
-/// them and amortizing the cost of scanning for them across all queries. The
-/// cache is also conservatively self-updating so that it will never return
-/// incorrect results about a function even as the function is being mutated.
-/// However, flushing the cache and rebuilding it (or explicitly updating it)
-/// may allow it to discover new assumptions.
+/// them and amortizing the cost of scanning for them across all queries. Passes
+/// that create new assumptions are required to call registerAssumption() to
+/// register any new @llvm.assume calls that they create. Deletions of
+/// @llvm.assume calls do not require special handling.
class AssumptionCache {
/// \brief The function for which this cache is handling assumptions.
///
@@ -87,6 +86,13 @@ public:
/// its instructions.
AssumptionCache(Function &F) : F(F), Scanned(false) {}
+ /// This cache is designed to be self-updating and so it should never be
+ /// invalidated.
+ bool invalidate(Function &, const PreservedAnalyses &,
+ FunctionAnalysisManager::Invalidator &) {
+ return false;
+ }
+
/// \brief Add an @llvm.assume intrinsic to this function's cache.
///
/// The call passed in must be an instruction within this function and must
@@ -196,7 +202,10 @@ public:
AssumptionCacheTracker();
~AssumptionCacheTracker() override;
- void releaseMemory() override { AssumptionCaches.shrink_and_clear(); }
+ void releaseMemory() override {
+ verifyAnalysis();
+ AssumptionCaches.shrink_and_clear();
+ }
void verifyAnalysis() const override;
bool doFinalization(Module &) override {
diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
index addfffa01061..14e4bded264a 100644
--- a/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -233,6 +233,24 @@ FunctionPass *createBasicAAWrapperPass();
/// populated to the best of our ability for a particular function when inside
/// of a \c ModulePass or a \c CallGraphSCCPass.
BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F);
+
+/// This class is a functor to be used in legacy module or SCC passes for
+/// computing AA results for a function. We store the results in fields so that
+/// they live long enough to be queried, but we re-use them each time.
+class LegacyAARGetter {
+ Pass &P;
+ Optional<BasicAAResult> BAR;
+ Optional<AAResults> AAR;
+
+public:
+ LegacyAARGetter(Pass &P) : P(P) {}
+ AAResults &operator()(Function &F) {
+ BAR.emplace(createLegacyPMBasicAAResult(P, F));
+ AAR.emplace(createLegacyPMAAResults(P, F, *BAR));
+ return *AAR;
+ }
+};
+
}
#endif
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index 562041d11fa1..cbae01c9102f 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -45,6 +45,10 @@ public:
~BlockFrequencyInfo();
+ /// Handle invalidation explicitly.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &);
+
const Function *getFunction() const;
const BranchProbabilityInfo *getBPI() const;
void view() const;
@@ -69,6 +73,12 @@ public:
// Set the frequency of the given basic block.
void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
+ /// Set the frequency of \p ReferenceBB to \p Freq and scale the frequencies
+ /// of the blocks in \p BlocksToScale such that their frequencies relative
+ /// to \p ReferenceBB remain unchanged.
+ void setBlockFreqAndScale(const BasicBlock *ReferenceBB, uint64_t Freq,
+ SmallPtrSetImpl<BasicBlock *> &BlocksToScale);
+
/// calculate - compute block frequency info for the given function.
void calculate(const Function &F, const BranchProbabilityInfo &BPI,
const LoopInfo &LI);
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 3f4428d18740..e3d81fea49ea 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -1291,11 +1291,14 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
}
std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph,
- GVDAGType GType) {
+ GVDAGType GType, int layout_order = -1) {
std::string Result;
raw_string_ostream OS(Result);
- OS << Node->getName().str() << " : ";
+ if (layout_order != -1)
+ OS << Node->getName() << "[" << layout_order << "] : ";
+ else
+ OS << Node->getName() << " : ";
switch (GType) {
case GVDT_Fraction:
Graph->printBlockFreq(OS, Node);
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 14b7a7f529f7..6a876679543d 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -164,6 +164,8 @@ private:
/// \brief Track the set of blocks that always lead to a cold call.
SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;
+ void updatePostDominatedByUnreachable(const BasicBlock *BB);
+ void updatePostDominatedByColdCall(const BasicBlock *BB);
bool calcUnreachableHeuristics(const BasicBlock *BB);
bool calcMetadataWeights(const BasicBlock *BB);
bool calcColdCallHeuristics(const BasicBlock *BB);
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index efaa9d6df8ea..5786769cc500 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -140,8 +140,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
std::string Str;
raw_string_ostream OS(Str);
- SwitchInst::ConstCaseIt Case =
- SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo);
+ auto Case = *SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo);
OS << Case.getCaseValue()->getValue();
return OS.str();
}
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 6fbe532112b2..398bbfb0c413 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -334,6 +334,7 @@ public:
InvalidSCCSet, nullptr, nullptr};
PreservedAnalyses PA = PreservedAnalyses::all();
+ CG.buildRefSCCs();
for (auto RCI = CG.postorder_ref_scc_begin(),
RCE = CG.postorder_ref_scc_end();
RCI != RCE;) {
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 4ecbaa75ac75..ea85436ee580 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -272,7 +272,7 @@ public:
private:
friend class CallGraph;
- AssertingVH<Function> F;
+ Function *F;
std::vector<CallRecord> CalledFunctions;
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 517842c8b0dc..ff6ca1959153 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -100,6 +100,12 @@ Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
/// successful; if not, null is returned.
Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
+/// \brief Attempt to constant fold a shufflevector instruction with the
+/// specified operands and indices. The constant result is returned if
+/// successful; if not, null is returned.
+Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
+ Constant *Mask);
+
/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
/// produce if it is constant and determinable. If this is not determinable,
/// return null.
diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h
index b9667f801ed3..8cae63c3c869 100644
--- a/include/llvm/Analysis/DominanceFrontier.h
+++ b/include/llvm/Analysis/DominanceFrontier.h
@@ -141,6 +141,10 @@ public:
typedef DominanceFrontierBase<BasicBlock>::DomSetType DomSetType;
typedef DominanceFrontierBase<BasicBlock>::iterator iterator;
typedef DominanceFrontierBase<BasicBlock>::const_iterator const_iterator;
+
+ /// Handle invalidation explicitly.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &);
};
class DominanceFrontierWrapperPass : public FunctionPass {
diff --git a/include/llvm/Analysis/IndirectCallSiteVisitor.h b/include/llvm/Analysis/IndirectCallSiteVisitor.h
index 71a8cb886321..3c40cc0235cc 100644
--- a/include/llvm/Analysis/IndirectCallSiteVisitor.h
+++ b/include/llvm/Analysis/IndirectCallSiteVisitor.h
@@ -21,16 +21,8 @@ struct PGOIndirectCallSiteVisitor
PGOIndirectCallSiteVisitor() {}
void visitCallSite(CallSite CS) {
- if (CS.getCalledFunction() || !CS.getCalledValue())
- return;
- Instruction *I = CS.getInstruction();
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->isInlineAsm())
- return;
- }
- if (isa<Constant>(CS.getCalledValue()))
- return;
- IndirectCallInsts.push_back(I);
+ if (CS.isIndirectCall())
+ IndirectCallInsts.push_back(CS.getInstruction());
}
};
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 5e7b00261f63..17e5cb6db02d 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -21,6 +21,7 @@
namespace llvm {
class AssumptionCacheTracker;
+class BlockFrequencyInfo;
class CallSite;
class DataLayout;
class Function;
@@ -137,6 +138,9 @@ struct InlineParams {
/// Threshold to use when the callsite is considered hot.
Optional<int> HotCallSiteThreshold;
+
+ /// Threshold to use when the callsite is considered cold.
+ Optional<int> ColdCallSiteThreshold;
};
/// Generate the parameters to tune the inline cost analysis based only on the
@@ -171,6 +175,7 @@ InlineCost
getInlineCost(CallSite CS, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI);
/// \brief Get an InlineCost with the callee explicitly specified.
@@ -182,6 +187,7 @@ InlineCost
getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI);
/// \brief Minimal filter to detect invalid constructs for inlining.
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 47d6118313cb..b829e995db05 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -42,6 +42,7 @@ namespace llvm {
class Instruction;
class DataLayout;
class FastMathFlags;
+ class OptimizationRemarkEmitter;
class TargetLibraryInfo;
class Type;
class Value;
@@ -246,6 +247,14 @@ namespace llvm {
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr);
+ /// Given operands for a ShuffleVectorInst, fold the result or return null.
+ Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
+ Type *RetTy, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr,
+ const DominatorTree *DT = nullptr,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr);
+
//=== Helper functions for higher up the class hierarchy.
@@ -296,7 +305,8 @@ namespace llvm {
Value *SimplifyInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI = nullptr,
const DominatorTree *DT = nullptr,
- AssumptionCache *AC = nullptr);
+ AssumptionCache *AC = nullptr,
+ OptimizationRemarkEmitter *ORE = nullptr);
/// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
///
diff --git a/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
index 5a02b9dce463..71ce0842f6a9 100644
--- a/include/llvm/Analysis/LazyBlockFrequencyInfo.h
+++ b/include/llvm/Analysis/LazyBlockFrequencyInfo.h
@@ -9,7 +9,7 @@
//
// This is an alternative analysis pass to BlockFrequencyInfoWrapperPass. The
// difference is that with this pass the block frequencies are not computed when
-// the analysis pass is executed but rather when the BFI results is explicitly
+// the analysis pass is executed but rather when the BFI result is explicitly
// requested by the analysis client.
//
//===----------------------------------------------------------------------===//
@@ -27,10 +27,58 @@ class BranchProbabilityInfo;
class Function;
class LoopInfo;
+/// Wraps a BFI to allow lazy computation of the block frequencies.
+///
+/// A pass that only conditionally uses BFI can uncondtionally require the
+/// analysis without paying for the overhead if BFI doesn't end up being used.
+template <typename FunctionT, typename BranchProbabilityInfoPassT,
+ typename LoopInfoT, typename BlockFrequencyInfoT>
+class LazyBlockFrequencyInfo {
+public:
+ LazyBlockFrequencyInfo()
+ : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {}
+
+ /// Set up the per-function input.
+ void setAnalysis(const FunctionT *F, BranchProbabilityInfoPassT *BPIPass,
+ const LoopInfoT *LI) {
+ this->F = F;
+ this->BPIPass = BPIPass;
+ this->LI = LI;
+ }
+
+ /// Retrieve the BFI with the block frequencies computed.
+ BlockFrequencyInfoT &getCalculated() {
+ if (!Calculated) {
+ assert(F && BPIPass && LI && "call setAnalysis");
+ BFI.calculate(
+ *F, BPIPassTrait<BranchProbabilityInfoPassT>::getBPI(BPIPass), *LI);
+ Calculated = true;
+ }
+ return BFI;
+ }
+
+ const BlockFrequencyInfoT &getCalculated() const {
+ return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated();
+ }
+
+ void releaseMemory() {
+ BFI.releaseMemory();
+ Calculated = false;
+ setAnalysis(nullptr, nullptr, nullptr);
+ }
+
+private:
+ BlockFrequencyInfoT BFI;
+ bool Calculated;
+ const FunctionT *F;
+ BranchProbabilityInfoPassT *BPIPass;
+ const LoopInfoT *LI;
+};
+
/// \brief This is an alternative analysis pass to
/// BlockFrequencyInfoWrapperPass. The difference is that with this pass the
/// block frequencies are not computed when the analysis pass is executed but
-/// rather when the BFI results is explicitly requested by the analysis client.
+/// rather when the BFI result is explicitly requested by the analysis client.
///
/// There are some additional requirements for any client pass that wants to use
/// the analysis:
@@ -49,54 +97,12 @@ class LoopInfo;
///
/// Note that it is expected that we wouldn't need this functionality for the
/// new PM since with the new PM, analyses are executed on demand.
-class LazyBlockFrequencyInfoPass : public FunctionPass {
-
- /// Wraps a BFI to allow lazy computation of the block frequencies.
- ///
- /// A pass that only conditionally uses BFI can uncondtionally require the
- /// analysis without paying for the overhead if BFI doesn't end up being used.
- class LazyBlockFrequencyInfo {
- public:
- LazyBlockFrequencyInfo()
- : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {}
-
- /// Set up the per-function input.
- void setAnalysis(const Function *F, LazyBranchProbabilityInfoPass *BPIPass,
- const LoopInfo *LI) {
- this->F = F;
- this->BPIPass = BPIPass;
- this->LI = LI;
- }
- /// Retrieve the BFI with the block frequencies computed.
- BlockFrequencyInfo &getCalculated() {
- if (!Calculated) {
- assert(F && BPIPass && LI && "call setAnalysis");
- BFI.calculate(*F, BPIPass->getBPI(), *LI);
- Calculated = true;
- }
- return BFI;
- }
-
- const BlockFrequencyInfo &getCalculated() const {
- return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated();
- }
-
- void releaseMemory() {
- BFI.releaseMemory();
- Calculated = false;
- setAnalysis(nullptr, nullptr, nullptr);
- }
-
- private:
- BlockFrequencyInfo BFI;
- bool Calculated;
- const Function *F;
- LazyBranchProbabilityInfoPass *BPIPass;
- const LoopInfo *LI;
- };
-
- LazyBlockFrequencyInfo LBFI;
+class LazyBlockFrequencyInfoPass : public FunctionPass {
+private:
+ LazyBlockFrequencyInfo<Function, LazyBranchProbabilityInfoPass, LoopInfo,
+ BlockFrequencyInfo>
+ LBFI;
public:
static char ID;
diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
index c76fa1e819ae..067d7ebfd1f5 100644
--- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h
+++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -105,5 +105,17 @@ public:
/// \brief Helper for client passes to initialize dependent passes for LBPI.
void initializeLazyBPIPassPass(PassRegistry &Registry);
+
+/// \brief Simple trait class that provides a mapping between BPI passes and the
+/// corresponding BPInfo.
+template <typename PassT> struct BPIPassTrait {
+ static PassT &getBPI(PassT *P) { return *P; }
+};
+
+template <> struct BPIPassTrait<LazyBranchProbabilityInfoPass> {
+ static BranchProbabilityInfo &getBPI(LazyBranchProbabilityInfoPass *P) {
+ return P->getBPI();
+ }
+};
}
#endif
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index bca0aebe2eef..ad7f5c80549f 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -106,6 +106,7 @@ class raw_ostream;
class LazyCallGraph {
public:
class Node;
+ class EdgeSequence;
class SCC;
class RefSCC;
class edge_iterator;
@@ -121,16 +122,6 @@ public:
/// inherently reference edges, and so the reference graph forms a superset
/// of the formal call graph.
///
- /// Furthermore, edges also may point to raw \c Function objects when those
- /// functions have not been scanned and incorporated into the graph (yet).
- /// This is one of the primary ways in which the graph can be lazy. When
- /// functions are scanned and fully incorporated into the graph, all of the
- /// edges referencing them are updated to point to the graph \c Node objects
- /// instead of to the raw \c Function objects. This class even provides
- /// methods to trigger this scan on-demand by attempting to get the target
- /// node of the graph and providing a reference back to the graph in order to
- /// lazily build it if necessary.
- ///
/// All of these forms of edges are fundamentally represented as outgoing
/// edges. The edges are stored in the source node and point at the target
/// node. This allows the edge structure itself to be a very compact data
@@ -141,7 +132,6 @@ public:
enum Kind : bool { Ref = false, Call = true };
Edge();
- explicit Edge(Function &F, Kind K);
explicit Edge(Node &N, Kind K);
/// Test whether the edge is null.
@@ -158,197 +148,251 @@ public:
/// This requires that the edge is not null.
bool isCall() const;
- /// Get the function referenced by this edge.
- ///
- /// This requires that the edge is not null, but will succeed whether we
- /// have built a graph node for the function yet or not.
- Function &getFunction() const;
-
- /// Get the call graph node referenced by this edge if one exists.
+ /// Get the call graph node referenced by this edge.
///
- /// This requires that the edge is not null. If we have built a graph node
- /// for the function this edge points to, this will return that node,
- /// otherwise it will return null.
- Node *getNode() const;
+ /// This requires that the edge is not null.
+ Node &getNode() const;
- /// Get the call graph node for this edge, building it if necessary.
+ /// Get the function referenced by this edge.
///
- /// This requires that the edge is not null. If we have not yet built
- /// a graph node for the function this edge points to, this will first ask
- /// the graph to build that node, inserting it into all the relevant
- /// structures.
- Node &getNode(LazyCallGraph &G);
+ /// This requires that the edge is not null.
+ Function &getFunction() const;
private:
- friend class LazyCallGraph::Node;
+ friend class LazyCallGraph::EdgeSequence;
friend class LazyCallGraph::RefSCC;
- PointerIntPair<PointerUnion<Function *, Node *>, 1, Kind> Value;
+ PointerIntPair<Node *, 1, Kind> Value;
void setKind(Kind K) { Value.setInt(K); }
};
- typedef SmallVector<Edge, 4> EdgeVectorT;
- typedef SmallVectorImpl<Edge> EdgeVectorImplT;
-
- /// A node in the call graph.
+ /// The edge sequence object.
///
- /// This represents a single node. It's primary roles are to cache the list of
- /// callees, de-duplicate and provide fast testing of whether a function is
- /// a callee, and facilitate iteration of child nodes in the graph.
- class Node {
+ /// This typically exists entirely within the node but is exposed as
+ /// a separate type because a node doesn't initially have edges. An explicit
+ /// population step is required to produce this sequence at first and it is
+ /// then cached in the node. It is also used to represent edges entering the
+ /// graph from outside the module to model the graph's roots.
+ ///
+ /// The sequence itself both iterable and indexable. The indexes remain
+ /// stable even as the sequence mutates (including removal).
+ class EdgeSequence {
friend class LazyCallGraph;
- friend class LazyCallGraph::SCC;
+ friend class LazyCallGraph::Node;
friend class LazyCallGraph::RefSCC;
- LazyCallGraph *G;
- Function &F;
+ typedef SmallVector<Edge, 4> VectorT;
+ typedef SmallVectorImpl<Edge> VectorImplT;
- // We provide for the DFS numbering and Tarjan walk lowlink numbers to be
- // stored directly within the node. These are both '-1' when nodes are part
- // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk.
- int DFSNumber;
- int LowLink;
+ public:
+ /// An iterator used for the edges to both entry nodes and child nodes.
+ class iterator
+ : public iterator_adaptor_base<iterator, VectorImplT::iterator,
+ std::forward_iterator_tag> {
+ friend class LazyCallGraph;
+ friend class LazyCallGraph::Node;
+
+ VectorImplT::iterator E;
+
+ // Build the iterator for a specific position in the edge list.
+ iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
+ : iterator_adaptor_base(BaseI), E(E) {
+ while (I != E && !*I)
+ ++I;
+ }
- mutable EdgeVectorT Edges;
- DenseMap<Function *, int> EdgeIndexMap;
+ public:
+ iterator() {}
- /// Basic constructor implements the scanning of F into Edges and
- /// EdgeIndexMap.
- Node(LazyCallGraph &G, Function &F);
+ using iterator_adaptor_base::operator++;
+ iterator &operator++() {
+ do {
+ ++I;
+ } while (I != E && !*I);
+ return *this;
+ }
+ };
- /// Internal helper to insert an edge to a function.
- void insertEdgeInternal(Function &ChildF, Edge::Kind EK);
+ /// An iterator over specifically call edges.
+ ///
+ /// This has the same iteration properties as the \c iterator, but
+ /// restricts itself to edges which represent actual calls.
+ class call_iterator
+ : public iterator_adaptor_base<call_iterator, VectorImplT::iterator,
+ std::forward_iterator_tag> {
+ friend class LazyCallGraph;
+ friend class LazyCallGraph::Node;
+
+ VectorImplT::iterator E;
+
+ /// Advance the iterator to the next valid, call edge.
+ void advanceToNextEdge() {
+ while (I != E && (!*I || !I->isCall()))
+ ++I;
+ }
- /// Internal helper to insert an edge to a node.
- void insertEdgeInternal(Node &ChildN, Edge::Kind EK);
+ // Build the iterator for a specific position in the edge list.
+ call_iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E)
+ : iterator_adaptor_base(BaseI), E(E) {
+ advanceToNextEdge();
+ }
- /// Internal helper to change an edge kind.
- void setEdgeKind(Function &ChildF, Edge::Kind EK);
+ public:
+ call_iterator() {}
- /// Internal helper to remove the edge to the given function.
- void removeEdgeInternal(Function &ChildF);
+ using iterator_adaptor_base::operator++;
+ call_iterator &operator++() {
+ ++I;
+ advanceToNextEdge();
+ return *this;
+ }
+ };
- void clear() {
- Edges.clear();
- EdgeIndexMap.clear();
- }
+ iterator begin() { return iterator(Edges.begin(), Edges.end()); }
+ iterator end() { return iterator(Edges.end(), Edges.end()); }
- /// Print the name of this node's function.
- friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
- return OS << N.F.getName();
+ Edge &operator[](int i) { return Edges[i]; }
+ Edge &operator[](Node &N) {
+ assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!");
+ return Edges[EdgeIndexMap.find(&N)->second];
}
-
- /// Dump the name of this node's function to stderr.
- void dump() const;
-
- public:
- LazyCallGraph &getGraph() const { return *G; }
-
- Function &getFunction() const { return F; }
-
- edge_iterator begin() const {
- return edge_iterator(Edges.begin(), Edges.end());
+ Edge *lookup(Node &N) {
+ auto EI = EdgeIndexMap.find(&N);
+ return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr;
}
- edge_iterator end() const { return edge_iterator(Edges.end(), Edges.end()); }
- const Edge &operator[](int i) const { return Edges[i]; }
- const Edge &operator[](Function &F) const {
- assert(EdgeIndexMap.find(&F) != EdgeIndexMap.end() && "No such edge!");
- return Edges[EdgeIndexMap.find(&F)->second];
+ call_iterator call_begin() {
+ return call_iterator(Edges.begin(), Edges.end());
}
- const Edge &operator[](Node &N) const { return (*this)[N.getFunction()]; }
+ call_iterator call_end() { return call_iterator(Edges.end(), Edges.end()); }
- const Edge *lookup(Function &F) const {
- auto EI = EdgeIndexMap.find(&F);
- return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr;
+ iterator_range<call_iterator> calls() {
+ return make_range(call_begin(), call_end());
}
- call_edge_iterator call_begin() const {
- return call_edge_iterator(Edges.begin(), Edges.end());
- }
- call_edge_iterator call_end() const {
- return call_edge_iterator(Edges.end(), Edges.end());
- }
+ bool empty() {
+ for (auto &E : Edges)
+ if (E)
+ return false;
- iterator_range<call_edge_iterator> calls() const {
- return make_range(call_begin(), call_end());
+ return true;
}
- /// Equality is defined as address equality.
- bool operator==(const Node &N) const { return this == &N; }
- bool operator!=(const Node &N) const { return !operator==(N); }
- };
+ private:
+ VectorT Edges;
+ DenseMap<Node *, int> EdgeIndexMap;
- /// A lazy iterator used for both the entry nodes and child nodes.
- ///
- /// When this iterator is dereferenced, if not yet available, a function will
- /// be scanned for "calls" or uses of functions and its child information
- /// will be constructed. All of these results are accumulated and cached in
- /// the graph.
- class edge_iterator
- : public iterator_adaptor_base<edge_iterator, EdgeVectorImplT::iterator,
- std::forward_iterator_tag> {
- friend class LazyCallGraph;
- friend class LazyCallGraph::Node;
+ EdgeSequence() = default;
- EdgeVectorImplT::iterator E;
+ /// Internal helper to insert an edge to a node.
+ void insertEdgeInternal(Node &ChildN, Edge::Kind EK);
- // Build the iterator for a specific position in the edge list.
- edge_iterator(EdgeVectorImplT::iterator BaseI,
- EdgeVectorImplT::iterator E)
- : iterator_adaptor_base(BaseI), E(E) {
- while (I != E && !*I)
- ++I;
- }
+ /// Internal helper to change an edge kind.
+ void setEdgeKind(Node &ChildN, Edge::Kind EK);
- public:
- edge_iterator() {}
+ /// Internal helper to remove the edge to the given function.
+ bool removeEdgeInternal(Node &ChildN);
- using iterator_adaptor_base::operator++;
- edge_iterator &operator++() {
- do {
- ++I;
- } while (I != E && !*I);
- return *this;
- }
+ /// Internal helper to replace an edge key with a new one.
+ ///
+ /// This should be used when the function for a particular node in the
+ /// graph gets replaced and we are updating all of the edges to that node
+ /// to use the new function as the key.
+ void replaceEdgeKey(Function &OldTarget, Function &NewTarget);
};
- /// A lazy iterator over specifically call edges.
+ /// A node in the call graph.
+ ///
+ /// This represents a single node. It's primary roles are to cache the list of
+ /// callees, de-duplicate and provide fast testing of whether a function is
+ /// a callee, and facilitate iteration of child nodes in the graph.
///
- /// This has the same iteration properties as the \c edge_iterator, but
- /// restricts itself to edges which represent actual calls.
- class call_edge_iterator
- : public iterator_adaptor_base<call_edge_iterator,
- EdgeVectorImplT::iterator,
- std::forward_iterator_tag> {
+ /// The node works much like an optional in order to lazily populate the
+ /// edges of each node. Until populated, there are no edges. Once populated,
+ /// you can access the edges by dereferencing the node or using the `->`
+ /// operator as if the node was an `Optional<EdgeSequence>`.
+ class Node {
friend class LazyCallGraph;
- friend class LazyCallGraph::Node;
+ friend class LazyCallGraph::RefSCC;
- EdgeVectorImplT::iterator E;
+ public:
+ LazyCallGraph &getGraph() const { return *G; }
- /// Advance the iterator to the next valid, call edge.
- void advanceToNextEdge() {
- while (I != E && (!*I || !I->isCall()))
- ++I;
+ Function &getFunction() const { return *F; }
+
+ StringRef getName() const { return F->getName(); }
+
+ /// Equality is defined as address equality.
+ bool operator==(const Node &N) const { return this == &N; }
+ bool operator!=(const Node &N) const { return !operator==(N); }
+
+ /// Tests whether the node has been populated with edges.
+ operator bool() const { return Edges.hasValue(); }
+
+ // We allow accessing the edges by dereferencing or using the arrow
+ // operator, essentially wrapping the internal optional.
+ EdgeSequence &operator*() const {
+ // Rip const off because the node itself isn't changing here.
+ return const_cast<EdgeSequence &>(*Edges);
}
+ EdgeSequence *operator->() const { return &**this; }
- // Build the iterator for a specific position in the edge list.
- call_edge_iterator(EdgeVectorImplT::iterator BaseI,
- EdgeVectorImplT::iterator E)
- : iterator_adaptor_base(BaseI), E(E) {
- advanceToNextEdge();
+ /// Populate the edges of this node if necessary.
+ ///
+ /// The first time this is called it will populate the edges for this node
+ /// in the graph. It does this by scanning the underlying function, so once
+ /// this is done, any changes to that function must be explicitly reflected
+ /// in updates to the graph.
+ ///
+ /// \returns the populated \c EdgeSequence to simplify walking it.
+ ///
+ /// This will not update or re-scan anything if called repeatedly. Instead,
+ /// the edge sequence is cached and returned immediately on subsequent
+ /// calls.
+ EdgeSequence &populate() {
+ if (Edges)
+ return *Edges;
+
+ return populateSlow();
}
- public:
- call_edge_iterator() {}
+ private:
+ LazyCallGraph *G;
+ Function *F;
- using iterator_adaptor_base::operator++;
- call_edge_iterator &operator++() {
- ++I;
- advanceToNextEdge();
- return *this;
+ // We provide for the DFS numbering and Tarjan walk lowlink numbers to be
+ // stored directly within the node. These are both '-1' when nodes are part
+ // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk.
+ int DFSNumber;
+ int LowLink;
+
+ Optional<EdgeSequence> Edges;
+
+ /// Basic constructor implements the scanning of F into Edges and
+ /// EdgeIndexMap.
+ Node(LazyCallGraph &G, Function &F)
+ : G(&G), F(&F), DFSNumber(0), LowLink(0) {}
+
+ /// Implementation of the scan when populating.
+ EdgeSequence &populateSlow();
+
+ /// Internal helper to directly replace the function with a new one.
+ ///
+ /// This is used to facilitate tranfsormations which need to replace the
+ /// formal Function object but directly move the body and users from one to
+ /// the other.
+ void replaceFunction(Function &NewF);
+
+ void clear() { Edges.reset(); }
+
+ /// Print the name of this node's function.
+ friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
+ return OS << N.F->getName();
}
+
+ /// Dump the name of this node's function to stderr.
+ void dump() const;
};
/// An SCC of the call graph.
@@ -789,19 +833,26 @@ public:
/// already existing edges.
void insertTrivialRefEdge(Node &SourceN, Node &TargetN);
+ /// Directly replace a node's function with a new function.
+ ///
+ /// This should be used when moving the body and users of a function to
+ /// a new formal function object but not otherwise changing the call graph
+ /// structure in any way.
+ ///
+ /// It requires that the old function in the provided node have zero uses
+ /// and the new function must have calls and references to it establishing
+ /// an equivalent graph.
+ void replaceNodeFunction(Node &N, Function &NewF);
+
///@}
};
/// A post-order depth-first RefSCC iterator over the call graph.
///
- /// This iterator triggers the Tarjan DFS-based formation of the RefSCC (and
- /// SCC) DAG for the call graph, walking it lazily in depth-first post-order.
- /// That is, it always visits RefSCCs for the target of a reference edge
- /// prior to visiting the RefSCC for a source of the edge (when they are in
- /// different RefSCCs).
- ///
- /// When forming each RefSCC, the call edges within it are used to form SCCs
- /// within it, so iterating this also controls the lazy formation of SCCs.
+ /// This iterator walks the cached post-order sequence of RefSCCs. However,
+ /// it trades stability for flexibility. It is restricted to a forward
+ /// iterator but will survive mutations which insert new RefSCCs and continue
+ /// to point to the same RefSCC even if it moves in the post-order sequence.
class postorder_ref_scc_iterator
: public iterator_facade_base<postorder_ref_scc_iterator,
std::forward_iterator_tag, RefSCC> {
@@ -825,12 +876,9 @@ public:
/// populating it if necessary.
static RefSCC *getRC(LazyCallGraph &G, int Index) {
if (Index == (int)G.PostOrderRefSCCs.size())
- if (!G.buildNextRefSCCInPostOrder())
- // We're at the end.
- return nullptr;
+ // We're at the end.
+ return nullptr;
- assert(Index < (int)G.PostOrderRefSCCs.size() &&
- "Built the next post-order RefSCC without growing list!");
return G.PostOrderRefSCCs[Index];
}
@@ -859,17 +907,21 @@ public:
LazyCallGraph(LazyCallGraph &&G);
LazyCallGraph &operator=(LazyCallGraph &&RHS);
- edge_iterator begin() {
- return edge_iterator(EntryEdges.begin(), EntryEdges.end());
- }
- edge_iterator end() {
- return edge_iterator(EntryEdges.end(), EntryEdges.end());
- }
+ EdgeSequence::iterator begin() { return EntryEdges.begin(); }
+ EdgeSequence::iterator end() { return EntryEdges.end(); }
+
+ void buildRefSCCs();
postorder_ref_scc_iterator postorder_ref_scc_begin() {
+ if (!EntryEdges.empty())
+ assert(!PostOrderRefSCCs.empty() &&
+ "Must form RefSCCs before iterating them!");
return postorder_ref_scc_iterator(*this);
}
postorder_ref_scc_iterator postorder_ref_scc_end() {
+ if (!EntryEdges.empty())
+ assert(!PostOrderRefSCCs.empty() &&
+ "Must form RefSCCs before iterating them!");
return postorder_ref_scc_iterator(*this,
postorder_ref_scc_iterator::IsAtEndT());
}
@@ -920,19 +972,19 @@ public:
/// below.
/// Update the call graph after inserting a new edge.
- void insertEdge(Node &Caller, Function &Callee, Edge::Kind EK);
+ void insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK);
/// Update the call graph after inserting a new edge.
- void insertEdge(Function &Caller, Function &Callee, Edge::Kind EK) {
- return insertEdge(get(Caller), Callee, EK);
+ void insertEdge(Function &Source, Function &Target, Edge::Kind EK) {
+ return insertEdge(get(Source), get(Target), EK);
}
/// Update the call graph after deleting an edge.
- void removeEdge(Node &Caller, Function &Callee);
+ void removeEdge(Node &SourceN, Node &TargetN);
/// Update the call graph after deleting an edge.
- void removeEdge(Function &Caller, Function &Callee) {
- return removeEdge(get(Caller), Callee);
+ void removeEdge(Function &Source, Function &Target) {
+ return removeEdge(get(Source), get(Target));
}
///@}
@@ -1013,14 +1065,11 @@ private:
/// Maps function->node for fast lookup.
DenseMap<const Function *, Node *> NodeMap;
- /// The entry nodes to the graph.
+ /// The entry edges into the graph.
///
- /// These nodes are reachable through "external" means. Put another way, they
+ /// These edges are from "external" sources. Put another way, they
/// escape at the module scope.
- EdgeVectorT EntryEdges;
-
- /// Map of the entry nodes in the graph to their indices in \c EntryEdges.
- DenseMap<Function *, int> EntryIndexMap;
+ EdgeSequence EntryEdges;
/// Allocator that holds all the call graph SCCs.
SpecificBumpPtrAllocator<SCC> SCCBPA;
@@ -1045,18 +1094,6 @@ private:
/// These are all of the RefSCCs which have no children.
SmallVector<RefSCC *, 4> LeafRefSCCs;
- /// Stack of nodes in the DFS walk.
- SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack;
-
- /// Set of entry nodes not-yet-processed into RefSCCs.
- SmallVector<Function *, 4> RefSCCEntryNodes;
-
- /// Stack of nodes the DFS has walked but not yet put into a RefSCC.
- SmallVector<Node *, 4> PendingRefSCCStack;
-
- /// Counter for the next DFS number to assign.
- int NextDFSNumber;
-
/// Helper to insert a new function, with an already looked-up entry in
/// the NodeMap.
Node &insertInto(Function &F, Node *&MappedN);
@@ -1078,6 +1115,23 @@ private:
return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...);
}
+ /// Common logic for building SCCs from a sequence of roots.
+ ///
+ /// This is a very generic implementation of the depth-first walk and SCC
+ /// formation algorithm. It uses a generic sequence of roots and generic
+ /// callbacks for each step. This is designed to be used to implement both
+ /// the RefSCC formation and SCC formation with shared logic.
+ ///
+ /// Currently this is a relatively naive implementation of Tarjan's DFS
+ /// algorithm to form the SCCs.
+ ///
+ /// FIXME: We should consider newer variants such as Nuutila.
+ template <typename RootsT, typename GetBeginT, typename GetEndT,
+ typename GetNodeT, typename FormSCCCallbackT>
+ static void buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
+ GetEndT &&GetEnd, GetNodeT &&GetNode,
+ FormSCCCallbackT &&FormSCC);
+
/// Build the SCCs for a RefSCC out of a list of nodes.
void buildSCCs(RefSCC &RC, node_stack_range Nodes);
@@ -1098,22 +1152,12 @@ private:
"Index does not point back at RC!");
return IndexIt->second;
}
-
- /// Builds the next node in the post-order RefSCC walk of the call graph and
- /// appends it to the \c PostOrderRefSCCs vector.
- ///
- /// Returns true if a new RefSCC was successfully constructed, and false if
- /// there are no more RefSCCs to build in the graph.
- bool buildNextRefSCCInPostOrder();
};
inline LazyCallGraph::Edge::Edge() : Value() {}
-inline LazyCallGraph::Edge::Edge(Function &F, Kind K) : Value(&F, K) {}
inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {}
-inline LazyCallGraph::Edge::operator bool() const {
- return !Value.getPointer().isNull();
-}
+inline LazyCallGraph::Edge::operator bool() const { return Value.getPointer(); }
inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const {
assert(*this && "Queried a null edge!");
@@ -1125,51 +1169,32 @@ inline bool LazyCallGraph::Edge::isCall() const {
return getKind() == Call;
}
-inline Function &LazyCallGraph::Edge::getFunction() const {
+inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode() const {
assert(*this && "Queried a null edge!");
- auto P = Value.getPointer();
- if (auto *F = P.dyn_cast<Function *>())
- return *F;
-
- return P.get<Node *>()->getFunction();
+ return *Value.getPointer();
}
-inline LazyCallGraph::Node *LazyCallGraph::Edge::getNode() const {
- assert(*this && "Queried a null edge!");
- auto P = Value.getPointer();
- if (auto *N = P.dyn_cast<Node *>())
- return N;
-
- return nullptr;
-}
-
-inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode(LazyCallGraph &G) {
+inline Function &LazyCallGraph::Edge::getFunction() const {
assert(*this && "Queried a null edge!");
- auto P = Value.getPointer();
- if (auto *N = P.dyn_cast<Node *>())
- return *N;
-
- Node &N = G.get(*P.get<Function *>());
- Value.setPointer(&N);
- return N;
+ return getNode().getFunction();
}
// Provide GraphTraits specializations for call graphs.
template <> struct GraphTraits<LazyCallGraph::Node *> {
typedef LazyCallGraph::Node *NodeRef;
- typedef LazyCallGraph::edge_iterator ChildIteratorType;
+ typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType;
static NodeRef getEntryNode(NodeRef N) { return N; }
- static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->end(); }
+ static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
};
template <> struct GraphTraits<LazyCallGraph *> {
typedef LazyCallGraph::Node *NodeRef;
- typedef LazyCallGraph::edge_iterator ChildIteratorType;
+ typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType;
static NodeRef getEntryNode(NodeRef N) { return N; }
- static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->end(); }
+ static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); }
};
/// An analysis pass which computes the call graph for a module.
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 610791023a7d..49e088e533dc 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -32,6 +32,7 @@ namespace llvm {
class LazyValueInfo {
friend class LazyValueInfoWrapperPass;
AssumptionCache *AC = nullptr;
+ const DataLayout *DL = nullptr;
class TargetLibraryInfo *TLI = nullptr;
DominatorTree *DT = nullptr;
void *PImpl = nullptr;
@@ -40,16 +41,17 @@ class LazyValueInfo {
public:
~LazyValueInfo();
LazyValueInfo() {}
- LazyValueInfo(AssumptionCache *AC_, TargetLibraryInfo *TLI_,
+ LazyValueInfo(AssumptionCache *AC_, const DataLayout *DL_, TargetLibraryInfo *TLI_,
DominatorTree *DT_)
- : AC(AC_), TLI(TLI_), DT(DT_) {}
+ : AC(AC_), DL(DL_), TLI(TLI_), DT(DT_) {}
LazyValueInfo(LazyValueInfo &&Arg)
- : AC(Arg.AC), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) {
+ : AC(Arg.AC), DL(Arg.DL), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) {
Arg.PImpl = nullptr;
}
LazyValueInfo &operator=(LazyValueInfo &&Arg) {
releaseMemory();
AC = Arg.AC;
+ DL = Arg.DL;
TLI = Arg.TLI;
DT = Arg.DT;
PImpl = Arg.PImpl;
@@ -98,8 +100,15 @@ public:
/// Inform the analysis cache that we have erased a block.
void eraseBlock(BasicBlock *BB);
+ /// Print the \LazyValueInfoCache.
+ void printCache(Function &F, raw_ostream &OS);
+
// For old PM pass. Delete once LazyValueInfoWrapperPass is gone.
void releaseMemory();
+
+ /// Handle invalidation events in the new pass manager.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv);
};
/// \brief Analysis to compute lazy value information.
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index e167f36219d2..a59c1f88e229 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -85,8 +85,37 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan = DefMaxInstsToScan,
AliasAnalysis *AA = nullptr,
- bool *IsLoadCSE = nullptr);
+ bool *IsLoadCSE = nullptr,
+ unsigned *NumScanedInst = nullptr);
+/// Scan backwards to see if we have the value of the given pointer available
+/// locally within a small number of instructions.
+///
+/// You can use this function to scan across multiple blocks: after you call
+/// this function, if ScanFrom points at the beginning of the block, it's safe
+/// to continue scanning the predecessors.
+///
+/// \param Ptr The pointer we want the load and store to originate from.
+/// \param AccessTy The access type of the pointer.
+/// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In
+/// case it is false, we can return an atomic or non-atomic load or store. In
+/// case it is true, we need to return an atomic load or store.
+/// \param ScanBB The basic block to scan.
+/// \param [in,out] ScanFrom The location to start scanning from. When this
+/// function returns, it points at the last instruction scanned.
+/// \param MaxInstsToScan The maximum number of instructions to scan. If this
+/// is zero, the whole block will be scanned.
+/// \param AA Optional pointer to alias analysis, to make the scan more
+/// precise.
+/// \param [out] IsLoad Whether the returned value is a load from the same
+/// location in memory, as opposed to the value operand of a store.
+///
+/// \returns The found value, or nullptr if no value is found.
+Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic,
+ BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan, AliasAnalysis *AA,
+ bool *IsLoad, unsigned *NumScanedInst);
}
#endif
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 901b193c7e2d..2568903c57f3 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -38,39 +38,6 @@ class SCEVUnionPredicate;
class LoopAccessInfo;
class OptimizationRemarkEmitter;
-/// Optimization analysis message produced during vectorization. Messages inform
-/// the user why vectorization did not occur.
-class LoopAccessReport {
- std::string Message;
- const Instruction *Instr;
-
-protected:
- LoopAccessReport(const Twine &Message, const Instruction *I)
- : Message(Message.str()), Instr(I) {}
-
-public:
- LoopAccessReport(const Instruction *I = nullptr) : Instr(I) {}
-
- template <typename A> LoopAccessReport &operator<<(const A &Value) {
- raw_string_ostream Out(Message);
- Out << Value;
- return *this;
- }
-
- const Instruction *getInstr() const { return Instr; }
-
- std::string &str() { return Message; }
- const std::string &str() const { return Message; }
- operator Twine() { return Message; }
-
- /// \brief Emit an analysis note for \p PassName with the debug location from
- /// the instruction in \p Message if available. Otherwise use the location of
- /// \p TheLoop.
- static void emitAnalysis(const LoopAccessReport &Message, const Loop *TheLoop,
- const char *PassName,
- OptimizationRemarkEmitter &ORE);
-};
-
/// \brief Collection of parameters shared beetween the Loop Vectorizer and the
/// Loop Access Analysis.
struct VectorizerParams {
@@ -126,7 +93,7 @@ struct VectorizerParams {
class MemoryDepChecker {
public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
- typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+ typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
/// \brief Set of potential dependent memory accesses.
typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
@@ -221,7 +188,7 @@ public:
/// \brief Check whether the dependencies between the accesses are safe.
///
/// Only checks sets with elements in \p CheckDeps.
- bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps,
+ bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
const ValueToValueMap &Strides);
/// \brief No memory dependence was encountered that would inhibit
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 20e6af2727fe..996794b660a9 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -26,7 +26,7 @@
// * etc...
//
// Note that this analysis specifically identifies *Loops* not cycles or SCCs
-// in the CFG. There can be strongly connected compontents in the CFG which
+// in the CFG. There can be strongly connected components in the CFG which
// this analysis will not recognize and that will not be represented by a Loop
// instance. In particular, a Loop might be inside such a non-loop SCC, or a
// non-loop SCC might contain a sub-SCC which is a Loop.
@@ -364,7 +364,7 @@ extern template class LoopBase<BasicBlock, Loop>;
/// Represents a single loop in the control flow graph. Note that not all SCCs
-/// in the CFG are neccessarily loops.
+/// in the CFG are necessarily loops.
class Loop : public LoopBase<BasicBlock, Loop> {
public:
/// \brief A range representing the start and end location of a loop.
@@ -469,7 +469,7 @@ public:
/// the loop that branches to the loop header.
///
/// The LoopID metadata node should have one or more operands and the first
- /// operand should should be the node itself.
+ /// operand should be the node itself.
void setLoopID(MDNode *LoopID) const;
/// Return true if no exit block for the loop has a predecessor that is
@@ -478,7 +478,8 @@ public:
/// Return all unique successor blocks of this loop.
/// These are the blocks _outside of the current loop_ which are branched to.
- /// This assumes that loop exits are in canonical form.
+ /// This assumes that loop exits are in canonical form, i.e. all exits are
+ /// dedicated exits.
void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
/// If getUniqueExitBlocks would return exactly one block, return that block.
@@ -570,6 +571,23 @@ public:
reverse_iterator rend() const { return TopLevelLoops.rend(); }
bool empty() const { return TopLevelLoops.empty(); }
+ /// Return all of the loops in the function in preorder across the loop
+ /// nests, with siblings in forward program order.
+ ///
+ /// Note that because loops form a forest of trees, preorder is equivalent to
+ /// reverse postorder.
+ SmallVector<LoopT *, 4> getLoopsInPreorder();
+
+ /// Return all of the loops in the function in preorder across the loop
+ /// nests, with siblings in *reverse* program order.
+ ///
+ /// Note that because loops form a forest of trees, preorder is equivalent to
+ /// reverse postorder.
+ ///
+ /// Also note that this is *not* a reverse preorder. Only the siblings are in
+ /// reverse program order.
+ SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder();
+
/// Return the inner most loop that BB lives in. If a basic block is in no
/// loop (for example the entry node), null is returned.
LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
@@ -682,6 +700,10 @@ public:
return *this;
}
+ /// Handle invalidation explicitly.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &);
+
// Most of the public interface is provided via LoopInfoBase.
/// Update LoopInfo after removing the last backedge from a loop. This updates
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 833a2202a568..761f8721b54f 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -507,6 +507,55 @@ analyze(const DominatorTreeBase<BlockT> &DomTree) {
DFS.traverse(DomRoot->getBlock());
}
+template <class BlockT, class LoopT>
+SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
+ SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
+ // The outer-most loop actually goes into the result in the same relative
+ // order as we walk it. But LoopInfo stores the top level loops in reverse
+ // program order so for here we reverse it to get forward program order.
+ // FIXME: If we change the order of LoopInfo we will want to remove the
+ // reverse here.
+ for (LoopT *RootL : reverse(*this)) {
+ assert(PreOrderWorklist.empty() &&
+ "Must start with an empty preorder walk worklist.");
+ PreOrderWorklist.push_back(RootL);
+ do {
+ LoopT *L = PreOrderWorklist.pop_back_val();
+ // Sub-loops are stored in forward program order, but will process the
+ // worklist backwards so append them in reverse order.
+ PreOrderWorklist.append(L->rbegin(), L->rend());
+ PreOrderLoops.push_back(L);
+ } while (!PreOrderWorklist.empty());
+ }
+
+ return PreOrderLoops;
+}
+
+template <class BlockT, class LoopT>
+SmallVector<LoopT *, 4>
+LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() {
+ SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
+ // The outer-most loop actually goes into the result in the same relative
+ // order as we walk it. LoopInfo stores the top level loops in reverse
+ // program order so we walk in order here.
+ // FIXME: If we change the order of LoopInfo we will want to add a reverse
+ // here.
+ for (LoopT *RootL : *this) {
+ assert(PreOrderWorklist.empty() &&
+ "Must start with an empty preorder walk worklist.");
+ PreOrderWorklist.push_back(RootL);
+ do {
+ LoopT *L = PreOrderWorklist.pop_back_val();
+ // Sub-loops are stored in forward program order, but will process the
+ // worklist backwards so we can just append them in order.
+ PreOrderWorklist.append(L->begin(), L->end());
+ PreOrderLoops.push_back(L);
+ } while (!PreOrderWorklist.empty());
+ }
+
+ return PreOrderLoops;
+}
+
// Debugging
template<class BlockT, class LoopT>
void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {
@@ -528,15 +577,48 @@ bool compareVectors(std::vector<T> &BB1, std::vector<T> &BB2) {
}
template <class BlockT, class LoopT>
-static void
-addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders,
- const LoopInfoBase<BlockT, LoopT> &LI,
- const LoopT &L) {
+void addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders,
+ const LoopInfoBase<BlockT, LoopT> &LI,
+ const LoopT &L) {
LoopHeaders[L.getHeader()] = &L;
for (LoopT *SL : L)
addInnerLoopsToHeadersMap(LoopHeaders, LI, *SL);
}
+#ifndef NDEBUG
+template <class BlockT, class LoopT>
+static void compareLoops(const LoopT *L, const LoopT *OtherL,
+ DenseMap<BlockT *, const LoopT *> &OtherLoopHeaders) {
+ BlockT *H = L->getHeader();
+ BlockT *OtherH = OtherL->getHeader();
+ assert(H == OtherH &&
+ "Mismatched headers even though found in the same map entry!");
+
+ assert(L->getLoopDepth() == OtherL->getLoopDepth() &&
+ "Mismatched loop depth!");
+ const LoopT *ParentL = L, *OtherParentL = OtherL;
+ do {
+ assert(ParentL->getHeader() == OtherParentL->getHeader() &&
+ "Mismatched parent loop headers!");
+ ParentL = ParentL->getParentLoop();
+ OtherParentL = OtherParentL->getParentLoop();
+ } while (ParentL);
+
+ for (const LoopT *SubL : *L) {
+ BlockT *SubH = SubL->getHeader();
+ const LoopT *OtherSubL = OtherLoopHeaders.lookup(SubH);
+ assert(OtherSubL && "Inner loop is missing in computed loop info!");
+ OtherLoopHeaders.erase(SubH);
+ compareLoops(SubL, OtherSubL, OtherLoopHeaders);
+ }
+
+ std::vector<BlockT *> BBs = L->getBlocks();
+ std::vector<BlockT *> OtherBBs = OtherL->getBlocks();
+ assert(compareVectors(BBs, OtherBBs) &&
+ "Mismatched basic blocks in the loops!");
+}
+#endif
+
template <class BlockT, class LoopT>
void LoopInfoBase<BlockT, LoopT>::verify(
const DominatorTreeBase<BlockT> &DomTree) const {
@@ -559,42 +641,32 @@ void LoopInfoBase<BlockT, LoopT>::verify(
LoopInfoBase<BlockT, LoopT> OtherLI;
OtherLI.analyze(DomTree);
- DenseMap<BlockT *, const LoopT *> LoopHeaders1;
- DenseMap<BlockT *, const LoopT *> LoopHeaders2;
-
- for (LoopT *L : *this)
- addInnerLoopsToHeadersMap(LoopHeaders1, *this, *L);
+ // Build a map we can use to move from our LI to the computed one. This
+ // allows us to ignore the particular order in any layer of the loop forest
+ // while still comparing the structure.
+ DenseMap<BlockT *, const LoopT *> OtherLoopHeaders;
for (LoopT *L : OtherLI)
- addInnerLoopsToHeadersMap(LoopHeaders2, OtherLI, *L);
- assert(LoopHeaders1.size() == LoopHeaders2.size() &&
- "LoopInfo is incorrect.");
-
- auto compareLoops = [&](const LoopT *L1, const LoopT *L2) {
- BlockT *H1 = L1->getHeader();
- BlockT *H2 = L2->getHeader();
- if (H1 != H2)
- return false;
- std::vector<BlockT *> BB1 = L1->getBlocks();
- std::vector<BlockT *> BB2 = L2->getBlocks();
- if (!compareVectors(BB1, BB2))
- return false;
-
- std::vector<BlockT *> SubLoopHeaders1;
- std::vector<BlockT *> SubLoopHeaders2;
- for (LoopT *L : *L1)
- SubLoopHeaders1.push_back(L->getHeader());
- for (LoopT *L : *L2)
- SubLoopHeaders2.push_back(L->getHeader());
-
- if (!compareVectors(SubLoopHeaders1, SubLoopHeaders2))
- return false;
- return true;
- };
-
- for (auto &I : LoopHeaders1) {
- BlockT *H = I.first;
- bool LoopsMatch = compareLoops(LoopHeaders1[H], LoopHeaders2[H]);
- assert(LoopsMatch && "LoopInfo is incorrect.");
+ addInnerLoopsToHeadersMap(OtherLoopHeaders, OtherLI, *L);
+
+ // Walk the top level loops and ensure there is a corresponding top-level
+ // loop in the computed version and then recursively compare those loop
+ // nests.
+ for (LoopT *L : *this) {
+ BlockT *Header = L->getHeader();
+ const LoopT *OtherL = OtherLoopHeaders.lookup(Header);
+ assert(OtherL && "Top level loop is missing in computed loop info!");
+ // Now that we've matched this loop, erase its header from the map.
+ OtherLoopHeaders.erase(Header);
+ // And recursively compare these loops.
+ compareLoops(L, OtherL, OtherLoopHeaders);
+ }
+
+ // Any remaining entries in the map are loops which were found when computing
+ // a fresh LoopInfo but not present in the current one.
+ if (!OtherLoopHeaders.empty()) {
+ for (const auto &HeaderAndLoop : OtherLoopHeaders)
+ dbgs() << "Found new loop: " << *HeaderAndLoop.second << "\n";
+ llvm_unreachable("Found new loops when recomputing LoopInfo!");
}
#endif
}
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index b58f07e69475..c5514316f75f 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -32,12 +32,6 @@ class TargetLibraryInfo;
class Type;
class Value;
-enum class ObjSizeMode {
- Exact = 0,
- Min = 1,
- Max = 2
-};
-
/// \brief Tests if a value is a call or invoke to a library function that
/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
/// like).
@@ -129,17 +123,36 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) {
// Utility functions to compute size of objects.
//
+/// Various options to control the behavior of getObjectSize.
+struct ObjectSizeOpts {
+ /// Controls how we handle conditional statements with unknown conditions.
+ enum class Mode : uint8_t {
+ /// Fail to evaluate an unknown condition.
+ Exact,
+ /// Evaluate all branches of an unknown condition. If all evaluations
+ /// succeed, pick the minimum size.
+ Min,
+ /// Same as Min, except we pick the maximum size of all of the branches.
+ Max
+ };
+
+ /// How we want to evaluate this object's size.
+ Mode EvalMode = Mode::Exact;
+ /// Whether to round the result up to the alignment of allocas, byval
+ /// arguments, and global variables.
+ bool RoundToAlign = false;
+ /// If this is true, null pointers in address space 0 will be treated as
+ /// though they can't be evaluated. Otherwise, null is always considered to
+ /// point to a 0 byte region of memory.
+ bool NullIsUnknownSize = false;
+};
+
/// \brief Compute the size of the object pointed by Ptr. Returns true and the
/// object size in Size if successful, and false otherwise. In this context, by
/// object we mean the region of memory starting at Ptr to the end of the
/// underlying object pointed to by Ptr.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-/// If Mode is Min or Max the size will be evaluated even if it depends on
-/// a condition and corresponding value will be returned (min or max).
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
- const TargetLibraryInfo *TLI, bool RoundToAlign = false,
- ObjSizeMode Mode = ObjSizeMode::Exact);
+ const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {});
/// Try to turn a call to @llvm.objectsize into an integer value of the given
/// Type. Returns null on failure.
@@ -160,8 +173,7 @@ class ObjectSizeOffsetVisitor
const DataLayout &DL;
const TargetLibraryInfo *TLI;
- bool RoundToAlign;
- ObjSizeMode Mode;
+ ObjectSizeOpts Options;
unsigned IntTyBits;
APInt Zero;
SmallPtrSet<Instruction *, 8> SeenInsts;
@@ -174,8 +186,7 @@ class ObjectSizeOffsetVisitor
public:
ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI,
- LLVMContext &Context, bool RoundToAlign = false,
- ObjSizeMode Mode = ObjSizeMode::Exact);
+ LLVMContext &Context, ObjectSizeOpts Options = {});
SizeOffsetType compute(Value *V);
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
new file mode 100644
index 000000000000..db31ae9f4f10
--- /dev/null
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -0,0 +1,1155 @@
+//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file exposes an interface to building/using memory SSA to
+/// walk memory instructions using a use/def graph.
+///
+/// Memory SSA class builds an SSA form that links together memory access
+/// instructions such as loads, stores, atomics, and calls. Additionally, it
+/// does a trivial form of "heap versioning" Every time the memory state changes
+/// in the program, we generate a new heap version. It generates
+/// MemoryDef/Uses/Phis that are overlayed on top of the existing instructions.
+///
+/// As a trivial example,
+/// define i32 @main() #0 {
+/// entry:
+/// %call = call noalias i8* @_Znwm(i64 4) #2
+/// %0 = bitcast i8* %call to i32*
+/// %call1 = call noalias i8* @_Znwm(i64 4) #2
+/// %1 = bitcast i8* %call1 to i32*
+/// store i32 5, i32* %0, align 4
+/// store i32 7, i32* %1, align 4
+/// %2 = load i32* %0, align 4
+/// %3 = load i32* %1, align 4
+/// %add = add nsw i32 %2, %3
+/// ret i32 %add
+/// }
+///
+/// Will become
+/// define i32 @main() #0 {
+/// entry:
+/// ; 1 = MemoryDef(0)
+/// %call = call noalias i8* @_Znwm(i64 4) #3
+/// %2 = bitcast i8* %call to i32*
+/// ; 2 = MemoryDef(1)
+/// %call1 = call noalias i8* @_Znwm(i64 4) #3
+/// %4 = bitcast i8* %call1 to i32*
+/// ; 3 = MemoryDef(2)
+/// store i32 5, i32* %2, align 4
+/// ; 4 = MemoryDef(3)
+/// store i32 7, i32* %4, align 4
+/// ; MemoryUse(3)
+/// %7 = load i32* %2, align 4
+/// ; MemoryUse(4)
+/// %8 = load i32* %4, align 4
+/// %add = add nsw i32 %7, %8
+/// ret i32 %add
+/// }
+///
+/// Given this form, all the stores that could ever effect the load at %8 can be
+/// gotten by using the MemoryUse associated with it, and walking from use to
+/// def until you hit the top of the function.
+///
+/// Each def also has a list of users associated with it, so you can walk from
+/// both def to users, and users to defs. Note that we disambiguate MemoryUses,
+/// but not the RHS of MemoryDefs. You can see this above at %7, which would
+/// otherwise be a MemoryUse(4). Being disambiguated means that for a given
+/// store, all the MemoryUses on its use lists are may-aliases of that store
+/// (but the MemoryDefs on its use list may not be).
+///
+/// MemoryDefs are not disambiguated because it would require multiple reaching
+/// definitions, which would require multiple phis, and multiple memoryaccesses
+/// per instruction.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMORYSSA_H
+#define LLVM_ANALYSIS_MEMORYSSA_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <utility>
+
+namespace llvm {
+
+class Function;
+class Instruction;
+class MemoryAccess;
+class LLVMContext;
+class raw_ostream;
+namespace MSSAHelpers {
+struct AllAccessTag {};
+struct DefsOnlyTag {};
+}
+
+enum {
+ // Used to signify what the default invalid ID is for MemoryAccess's
+ // getID()
+ INVALID_MEMORYACCESS_ID = 0
+};
+
+template <class T> class memoryaccess_def_iterator_base;
+using memoryaccess_def_iterator = memoryaccess_def_iterator_base<MemoryAccess>;
+using const_memoryaccess_def_iterator =
+ memoryaccess_def_iterator_base<const MemoryAccess>;
+
+// \brief The base for all memory accesses. All memory accesses in a block are
+// linked together using an intrusive list.
+class MemoryAccess
+ : public User,
+ public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>,
+ public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>> {
+public:
+ using AllAccessType =
+ ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
+ using DefsOnlyType =
+ ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
+
+ // Methods for support type inquiry through isa, cast, and
+ // dyn_cast
+ static inline bool classof(const Value *V) {
+ unsigned ID = V->getValueID();
+ return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal;
+ }
+
+ MemoryAccess(const MemoryAccess &) = delete;
+ MemoryAccess &operator=(const MemoryAccess &) = delete;
+ ~MemoryAccess() override;
+
+ void *operator new(size_t, unsigned) = delete;
+ void *operator new(size_t) = delete;
+
+ BasicBlock *getBlock() const { return Block; }
+
+ virtual void print(raw_ostream &OS) const = 0;
+ virtual void dump() const;
+
+ /// \brief The user iterators for a memory access
+ typedef user_iterator iterator;
+ typedef const_user_iterator const_iterator;
+
+ /// \brief This iterator walks over all of the defs in a given
+ /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For
+ /// MemoryUse/MemoryDef, this walks the defining access.
+ memoryaccess_def_iterator defs_begin();
+ const_memoryaccess_def_iterator defs_begin() const;
+ memoryaccess_def_iterator defs_end();
+ const_memoryaccess_def_iterator defs_end() const;
+
+ /// \brief Get the iterators for the all access list and the defs only list
+ /// We default to the all access list.
+ AllAccessType::self_iterator getIterator() {
+ return this->AllAccessType::getIterator();
+ }
+ AllAccessType::const_self_iterator getIterator() const {
+ return this->AllAccessType::getIterator();
+ }
+ AllAccessType::reverse_self_iterator getReverseIterator() {
+ return this->AllAccessType::getReverseIterator();
+ }
+ AllAccessType::const_reverse_self_iterator getReverseIterator() const {
+ return this->AllAccessType::getReverseIterator();
+ }
+ DefsOnlyType::self_iterator getDefsIterator() {
+ return this->DefsOnlyType::getIterator();
+ }
+ DefsOnlyType::const_self_iterator getDefsIterator() const {
+ return this->DefsOnlyType::getIterator();
+ }
+ DefsOnlyType::reverse_self_iterator getReverseDefsIterator() {
+ return this->DefsOnlyType::getReverseIterator();
+ }
+ DefsOnlyType::const_reverse_self_iterator getReverseDefsIterator() const {
+ return this->DefsOnlyType::getReverseIterator();
+ }
+
+protected:
+ friend class MemorySSA;
+ friend class MemoryUseOrDef;
+ friend class MemoryUse;
+ friend class MemoryDef;
+ friend class MemoryPhi;
+
+ /// \brief Used by MemorySSA to change the block of a MemoryAccess when it is
+ /// moved.
+ void setBlock(BasicBlock *BB) { Block = BB; }
+
+ /// \brief Used for debugging and tracking things about MemoryAccesses.
+ /// Guaranteed unique among MemoryAccesses, no guarantees otherwise.
+ virtual unsigned getID() const = 0;
+
+ MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB,
+ unsigned NumOperands)
+ : User(Type::getVoidTy(C), Vty, nullptr, NumOperands), Block(BB) {}
+
+private:
+ BasicBlock *Block;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) {
+ MA.print(OS);
+ return OS;
+}
+
+/// \brief Class that has the common methods + fields of memory uses/defs. It's
+/// a little awkward to have, but there are many cases where we want either a
+/// use or def, and there are many cases where uses are needed (defs aren't
+/// acceptable), and vice-versa.
+///
+/// This class should never be instantiated directly; make a MemoryUse or
+/// MemoryDef instead.
+class MemoryUseOrDef : public MemoryAccess {
+public:
+ void *operator new(size_t, unsigned) = delete;
+ void *operator new(size_t) = delete;
+
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+ /// \brief Get the instruction that this MemoryUse represents.
+ Instruction *getMemoryInst() const { return MemoryInst; }
+
+ /// \brief Get the access that produces the memory state used by this Use.
+ MemoryAccess *getDefiningAccess() const { return getOperand(0); }
+
+ static inline bool classof(const Value *MA) {
+ return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal;
+ }
+
+ // Sadly, these have to be public because they are needed in some of the
+ // iterators.
+ virtual bool isOptimized() const = 0;
+ virtual MemoryAccess *getOptimized() const = 0;
+ virtual void setOptimized(MemoryAccess *) = 0;
+
+ /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to
+ /// be rewalked by the walker if necessary.
+ /// This really should only be called by tests.
+ virtual void resetOptimized() = 0;
+
+protected:
+ friend class MemorySSA;
+ friend class MemorySSAUpdater;
+ MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty,
+ Instruction *MI, BasicBlock *BB)
+ : MemoryAccess(C, Vty, BB, 1), MemoryInst(MI) {
+ setDefiningAccess(DMA);
+ }
+ void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) {
+ if (!Optimized) {
+ setOperand(0, DMA);
+ return;
+ }
+ setOptimized(DMA);
+ }
+
+private:
+ Instruction *MemoryInst;
+};
+
+template <>
+struct OperandTraits<MemoryUseOrDef>
+ : public FixedNumOperandTraits<MemoryUseOrDef, 1> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)
+
+/// \brief Represents read-only accesses to memory
+///
+/// In particular, the set of Instructions that will be represented by
+/// MemoryUse's is exactly the set of Instructions for which
+/// AliasAnalysis::getModRefInfo returns "Ref".
+class MemoryUse final : public MemoryUseOrDef {
+public:
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+ MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB)
+ : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {}
+
+ // allocate space for exactly one operand
+ void *operator new(size_t s) { return User::operator new(s, 1); }
+ void *operator new(size_t, unsigned) = delete;
+
+ static inline bool classof(const Value *MA) {
+ return MA->getValueID() == MemoryUseVal;
+ }
+
+ void print(raw_ostream &OS) const override;
+
+ virtual void setOptimized(MemoryAccess *DMA) override {
+ OptimizedID = DMA->getID();
+ setOperand(0, DMA);
+ }
+
+ virtual bool isOptimized() const override {
+ return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID();
+ }
+
+ virtual MemoryAccess *getOptimized() const override {
+ return getDefiningAccess();
+ }
+ virtual void resetOptimized() override {
+ OptimizedID = INVALID_MEMORYACCESS_ID;
+ }
+
+protected:
+ friend class MemorySSA;
+
+ unsigned getID() const override {
+ llvm_unreachable("MemoryUses do not have IDs");
+ }
+
+private:
+ unsigned int OptimizedID;
+};
+
+template <>
+struct OperandTraits<MemoryUse> : public FixedNumOperandTraits<MemoryUse, 1> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess)
+
+/// \brief Represents a read-write access to memory, whether it is a must-alias,
+/// or a may-alias.
+///
+/// In particular, the set of Instructions that will be represented by
+/// MemoryDef's is exactly the set of Instructions for which
+/// AliasAnalysis::getModRefInfo returns "Mod" or "ModRef".
+/// Note that, in order to provide def-def chains, all defs also have a use
+/// associated with them. This use points to the nearest reaching
+/// MemoryDef/MemoryPhi.
+class MemoryDef final : public MemoryUseOrDef {
+public:
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+ MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB,
+ unsigned Ver)
+ : MemoryUseOrDef(C, DMA, MemoryDefVal, MI, BB), ID(Ver),
+ Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {}
+
+ // allocate space for exactly one operand
+ void *operator new(size_t s) { return User::operator new(s, 1); }
+ void *operator new(size_t, unsigned) = delete;
+
+ static inline bool classof(const Value *MA) {
+ return MA->getValueID() == MemoryDefVal;
+ }
+
+ virtual void setOptimized(MemoryAccess *MA) override {
+ Optimized = MA;
+ OptimizedID = getDefiningAccess()->getID();
+ }
+ virtual MemoryAccess *getOptimized() const override { return Optimized; }
+ virtual bool isOptimized() const override {
+ return getOptimized() && getDefiningAccess() &&
+ OptimizedID == getDefiningAccess()->getID();
+ }
+ virtual void resetOptimized() override {
+ OptimizedID = INVALID_MEMORYACCESS_ID;
+ }
+
+ void print(raw_ostream &OS) const override;
+
+protected:
+ friend class MemorySSA;
+
+ unsigned getID() const override { return ID; }
+
+private:
+ const unsigned ID;
+ MemoryAccess *Optimized;
+ unsigned int OptimizedID;
+};
+
+template <>
+struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 1> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess)
+
+/// \brief Represents phi nodes for memory accesses.
+///
+/// These have the same semantic as regular phi nodes, with the exception that
+/// only one phi will ever exist in a given basic block.
+/// Guaranteeing one phi per block means guaranteeing there is only ever one
+/// valid reaching MemoryDef/MemoryPHI along each path to the phi node.
+/// This is ensured by not allowing disambiguation of the RHS of a MemoryDef or
+/// a MemoryPhi's operands.
+/// That is, given
+/// if (a) {
+/// store %a
+/// store %b
+/// }
+/// it *must* be transformed into
+/// if (a) {
+/// 1 = MemoryDef(liveOnEntry)
+/// store %a
+/// 2 = MemoryDef(1)
+/// store %b
+/// }
+/// and *not*
+/// if (a) {
+/// 1 = MemoryDef(liveOnEntry)
+/// store %a
+/// 2 = MemoryDef(liveOnEntry)
+/// store %b
+/// }
+/// even if the two stores do not conflict. Otherwise, both 1 and 2 reach the
+/// end of the branch, and if there are not two phi nodes, one will be
+/// disconnected completely from the SSA graph below that point.
+/// Because MemoryUse's do not generate new definitions, they do not have this
+/// issue.
+class MemoryPhi final : public MemoryAccess {
+ // allocate space for exactly zero operands
+ void *operator new(size_t s) { return User::operator new(s); }
+
+public:
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess);
+
+ MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0)
+ : MemoryAccess(C, MemoryPhiVal, BB, 0), ID(Ver), ReservedSpace(NumPreds) {
+ allocHungoffUses(ReservedSpace);
+ }
+
+ void *operator new(size_t, unsigned) = delete;
+
+ // Block iterator interface. This provides access to the list of incoming
+ // basic blocks, which parallels the list of incoming values.
+ typedef BasicBlock **block_iterator;
+ typedef BasicBlock *const *const_block_iterator;
+
+ block_iterator block_begin() {
+ auto *Ref = reinterpret_cast<Use::UserRef *>(op_begin() + ReservedSpace);
+ return reinterpret_cast<block_iterator>(Ref + 1);
+ }
+
+ const_block_iterator block_begin() const {
+ const auto *Ref =
+ reinterpret_cast<const Use::UserRef *>(op_begin() + ReservedSpace);
+ return reinterpret_cast<const_block_iterator>(Ref + 1);
+ }
+
+ block_iterator block_end() { return block_begin() + getNumOperands(); }
+
+ const_block_iterator block_end() const {
+ return block_begin() + getNumOperands();
+ }
+
+ iterator_range<block_iterator> blocks() {
+ return make_range(block_begin(), block_end());
+ }
+
+ iterator_range<const_block_iterator> blocks() const {
+ return make_range(block_begin(), block_end());
+ }
+
+ op_range incoming_values() { return operands(); }
+
+ const_op_range incoming_values() const { return operands(); }
+
+ /// \brief Return the number of incoming edges
+ unsigned getNumIncomingValues() const { return getNumOperands(); }
+
+ /// \brief Return incoming value number x
+ MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); }
+ void setIncomingValue(unsigned I, MemoryAccess *V) {
+ assert(V && "PHI node got a null value!");
+ setOperand(I, V);
+ }
+ static unsigned getOperandNumForIncomingValue(unsigned I) { return I; }
+ static unsigned getIncomingValueNumForOperand(unsigned I) { return I; }
+
+ /// \brief Return incoming basic block number @p i.
+ BasicBlock *getIncomingBlock(unsigned I) const { return block_begin()[I]; }
+
+ /// \brief Return incoming basic block corresponding
+ /// to an operand of the PHI.
+ BasicBlock *getIncomingBlock(const Use &U) const {
+ assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
+ return getIncomingBlock(unsigned(&U - op_begin()));
+ }
+
+ /// \brief Return incoming basic block corresponding
+ /// to value use iterator.
+ BasicBlock *getIncomingBlock(MemoryAccess::const_user_iterator I) const {
+ return getIncomingBlock(I.getUse());
+ }
+
+ void setIncomingBlock(unsigned I, BasicBlock *BB) {
+ assert(BB && "PHI node got a null basic block!");
+ block_begin()[I] = BB;
+ }
+
+ /// \brief Add an incoming value to the end of the PHI list
+ void addIncoming(MemoryAccess *V, BasicBlock *BB) {
+ if (getNumOperands() == ReservedSpace)
+ growOperands(); // Get more space!
+ // Initialize some new operands.
+ setNumHungOffUseOperands(getNumOperands() + 1);
+ setIncomingValue(getNumOperands() - 1, V);
+ setIncomingBlock(getNumOperands() - 1, BB);
+ }
+
+ /// \brief Return the first index of the specified basic
+ /// block in the value list for this PHI. Returns -1 if no instance.
+ int getBasicBlockIndex(const BasicBlock *BB) const {
+ for (unsigned I = 0, E = getNumOperands(); I != E; ++I)
+ if (block_begin()[I] == BB)
+ return I;
+ return -1;
+ }
+
+ Value *getIncomingValueForBlock(const BasicBlock *BB) const {
+ int Idx = getBasicBlockIndex(BB);
+ assert(Idx >= 0 && "Invalid basic block argument!");
+ return getIncomingValue(Idx);
+ }
+
+ static inline bool classof(const Value *V) {
+ return V->getValueID() == MemoryPhiVal;
+ }
+
+ void print(raw_ostream &OS) const override;
+
+protected:
+ friend class MemorySSA;
+
+ /// \brief this is more complicated than the generic
+ /// User::allocHungoffUses, because we have to allocate Uses for the incoming
+ /// values and pointers to the incoming blocks, all in one allocation.
+ void allocHungoffUses(unsigned N) {
+ User::allocHungoffUses(N, /* IsPhi */ true);
+ }
+
+ unsigned getID() const final { return ID; }
+
+private:
+ // For debugging only
+ const unsigned ID;
+ unsigned ReservedSpace;
+
+ /// \brief This grows the operand list in response to a push_back style of
+ /// operation. This grows the number of ops by 1.5 times.
+ void growOperands() {
+ unsigned E = getNumOperands();
+ // 2 op PHI nodes are VERY common, so reserve at least enough for that.
+ ReservedSpace = std::max(E + E / 2, 2u);
+ growHungoffUses(ReservedSpace, /* IsPhi */ true);
+ }
+};
+
+template <> struct OperandTraits<MemoryPhi> : public HungoffOperandTraits<2> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess)
+
+class MemorySSAWalker;
+
+/// \brief Encapsulates MemorySSA, including all data associated with memory
+/// accesses.
+class MemorySSA {
+public:
+ MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
+ ~MemorySSA();
+
+ MemorySSAWalker *getWalker();
+
+ /// \brief Given a memory Mod/Ref'ing instruction, get the MemorySSA
+ /// access associated with it. If passed a basic block gets the memory phi
+ /// node that exists for that block, if there is one. Otherwise, this will get
+ /// a MemoryUseOrDef.
+ MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+ MemoryPhi *getMemoryAccess(const BasicBlock *BB) const;
+
+ void dump() const;
+ void print(raw_ostream &) const;
+
+ /// \brief Return true if \p MA represents the live on entry value
+ ///
+ /// Loads and stores from pointer arguments and other global values may be
+ /// defined by memory operations that do not occur in the current function, so
+ /// they may be live on entry to the function. MemorySSA represents such
+ /// memory state by the live on entry definition, which is guaranteed to occur
+ /// before any other memory access in the function.
+ inline bool isLiveOnEntryDef(const MemoryAccess *MA) const {
+ return MA == LiveOnEntryDef.get();
+ }
+
+ inline MemoryAccess *getLiveOnEntryDef() const {
+ return LiveOnEntryDef.get();
+ }
+
+ // Sadly, iplists, by default, owns and deletes pointers added to the
+ // list. It's not currently possible to have two iplists for the same type,
+ // where one owns the pointers, and one does not. This is because the traits
+ // are per-type, not per-tag. If this ever changes, we should make the
+ // DefList an iplist.
+ using AccessList = iplist<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
+ using DefsList =
+ simple_ilist<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
+
+ /// \brief Return the list of MemoryAccess's for a given basic block.
+ ///
+ /// This list is not modifiable by the user.
+ const AccessList *getBlockAccesses(const BasicBlock *BB) const {
+ return getWritableBlockAccesses(BB);
+ }
+
+ /// \brief Return the list of MemoryDef's and MemoryPhi's for a given basic
+ /// block.
+ ///
+ /// This list is not modifiable by the user.
+ const DefsList *getBlockDefs(const BasicBlock *BB) const {
+ return getWritableBlockDefs(BB);
+ }
+
+ /// \brief Given two memory accesses in the same basic block, determine
+ /// whether MemoryAccess \p A dominates MemoryAccess \p B.
+ bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const;
+
+ /// \brief Given two memory accesses in potentially different blocks,
+ /// determine whether MemoryAccess \p A dominates MemoryAccess \p B.
+ bool dominates(const MemoryAccess *A, const MemoryAccess *B) const;
+
+ /// \brief Given a MemoryAccess and a Use, determine whether MemoryAccess \p A
+ /// dominates Use \p B.
+ bool dominates(const MemoryAccess *A, const Use &B) const;
+
+ /// \brief Verify that MemorySSA is self consistent (IE definitions dominate
+ /// all uses, uses appear in the right places). This is used by unit tests.
+ void verifyMemorySSA() const;
+
+ /// Used in various insertion functions to specify whether we are talking
+ /// about the beginning or end of a block.
+ enum InsertionPlace { Beginning, End };
+
+protected:
+ // Used by Memory SSA annotater, dumpers, and wrapper pass
+ friend class MemorySSAAnnotatedWriter;
+ friend class MemorySSAPrinterLegacyPass;
+ friend class MemorySSAUpdater;
+
+ void verifyDefUses(Function &F) const;
+ void verifyDomination(Function &F) const;
+ void verifyOrdering(Function &F) const;
+
+ // This is used by the use optimizer and updater.
+ AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
+ auto It = PerBlockAccesses.find(BB);
+ return It == PerBlockAccesses.end() ? nullptr : It->second.get();
+ }
+
+ // This is used by the use optimizer and updater.
+ DefsList *getWritableBlockDefs(const BasicBlock *BB) const {
+ auto It = PerBlockDefs.find(BB);
+ return It == PerBlockDefs.end() ? nullptr : It->second.get();
+ }
+
+ // These is used by the updater to perform various internal MemorySSA
+ // machinsations. They do not always leave the IR in a correct state, and
+ // relies on the updater to fixup what it breaks, so it is not public.
+
+ void moveTo(MemoryUseOrDef *What, BasicBlock *BB, AccessList::iterator Where);
+ void moveTo(MemoryUseOrDef *What, BasicBlock *BB, InsertionPlace Point);
+ // Rename the dominator tree branch rooted at BB.
+ void renamePass(BasicBlock *BB, MemoryAccess *IncomingVal,
+ SmallPtrSetImpl<BasicBlock *> &Visited) {
+ renamePass(DT->getNode(BB), IncomingVal, Visited, true, true);
+ }
+ void removeFromLookups(MemoryAccess *);
+ void removeFromLists(MemoryAccess *, bool ShouldDelete = true);
+ void insertIntoListsForBlock(MemoryAccess *, const BasicBlock *,
+ InsertionPlace);
+ void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
+ AccessList::iterator);
+ MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *);
+
+private:
+ class CachingWalker;
+ class OptimizeUses;
+
+ CachingWalker *getWalkerImpl();
+ void buildMemorySSA();
+ void optimizeUses();
+
+ void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
+ using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
+ using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>;
+
+ void
+ determineInsertionPoint(const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks);
+ void markUnreachableAsLiveOnEntry(BasicBlock *BB);
+ bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
+ MemoryPhi *createMemoryPhi(BasicBlock *BB);
+ MemoryUseOrDef *createNewAccess(Instruction *);
+ MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
+ void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &,
+ const DenseMap<const BasicBlock *, unsigned int> &);
+ MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
+ void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool);
+ void renamePass(DomTreeNode *, MemoryAccess *IncomingVal,
+ SmallPtrSetImpl<BasicBlock *> &Visited,
+ bool SkipVisited = false, bool RenameAllUses = false);
+ AccessList *getOrCreateAccessList(const BasicBlock *);
+ DefsList *getOrCreateDefsList(const BasicBlock *);
+ void renumberBlock(const BasicBlock *) const;
+ AliasAnalysis *AA;
+ DominatorTree *DT;
+ Function &F;
+
+ // Memory SSA mappings
+ DenseMap<const Value *, MemoryAccess *> ValueToMemoryAccess;
+ // These two mappings contain the main block to access/def mappings for
+ // MemorySSA. The list contained in PerBlockAccesses really owns all the
+ // MemoryAccesses.
+ // Both maps maintain the invariant that if a block is found in them, the
+ // corresponding list is not empty, and if a block is not found in them, the
+ // corresponding list is empty.
+ AccessMap PerBlockAccesses;
+ DefsMap PerBlockDefs;
+ std::unique_ptr<MemoryAccess> LiveOnEntryDef;
+
+ // Domination mappings
+ // Note that the numbering is local to a block, even though the map is
+ // global.
+ mutable SmallPtrSet<const BasicBlock *, 16> BlockNumberingValid;
+ mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
+
+ // Memory SSA building info
+ std::unique_ptr<CachingWalker> Walker;
+ unsigned NextID;
+};
+
+// Internal MemorySSA utils, for use by MemorySSA classes and walkers
+class MemorySSAUtil {
+protected:
+ friend class MemorySSAWalker;
+ friend class GVNHoist;
+ // This function should not be used by new passes.
+ static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
+ AliasAnalysis &AA);
+};
+
+// This pass does eager building and then printing of MemorySSA. It is used by
+// the tests to be able to build, dump, and verify Memory SSA.
+class MemorySSAPrinterLegacyPass : public FunctionPass {
+public:
+ MemorySSAPrinterLegacyPass();
+
+ bool runOnFunction(Function &) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ static char ID;
+};
+
+/// An analysis that produces \c MemorySSA for a function.
+///
+class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> {
+ friend AnalysisInfoMixin<MemorySSAAnalysis>;
+
+ static AnalysisKey Key;
+
+public:
+ // Wrap MemorySSA result to ensure address stability of internal MemorySSA
+ // pointers after construction. Use a wrapper class instead of plain
+ // unique_ptr<MemorySSA> to avoid build breakage on MSVC.
+ struct Result {
+ Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {}
+ MemorySSA &getMSSA() { return *MSSA.get(); }
+
+ std::unique_ptr<MemorySSA> MSSA;
+ };
+
+ Result run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// \brief Printer pass for \c MemorySSA.
+class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// \brief Verifier pass for \c MemorySSA.
+struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// \brief Legacy analysis pass which computes \c MemorySSA.
+class MemorySSAWrapperPass : public FunctionPass {
+public:
+ MemorySSAWrapperPass();
+
+ static char ID;
+
+ bool runOnFunction(Function &) override;
+ void releaseMemory() override;
+ MemorySSA &getMSSA() { return *MSSA; }
+ const MemorySSA &getMSSA() const { return *MSSA; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void verifyAnalysis() const override;
+ void print(raw_ostream &OS, const Module *M = nullptr) const override;
+
+private:
+ std::unique_ptr<MemorySSA> MSSA;
+};
+
+/// \brief This is the generic walker interface for walkers of MemorySSA.
+/// Walkers are used to be able to further disambiguate the def-use chains
+/// MemorySSA gives you, or otherwise produce better info than MemorySSA gives
+/// you.
+/// In particular, while the def-use chains provide basic information, and are
+/// guaranteed to give, for example, the nearest may-aliasing MemoryDef for a
+/// MemoryUse as AliasAnalysis considers it, a user mant want better or other
+/// information. In particular, they may want to use SCEV info to further
+/// disambiguate memory accesses, or they may want the nearest dominating
+/// may-aliasing MemoryDef for a call or a store. This API enables a
+/// standardized interface to getting and using that info.
+class MemorySSAWalker {
+public:
+ MemorySSAWalker(MemorySSA *);
+ virtual ~MemorySSAWalker() = default;
+
+ using MemoryAccessSet = SmallVector<MemoryAccess *, 8>;
+
+ /// \brief Given a memory Mod/Ref/ModRef'ing instruction, calling this
+ /// will give you the nearest dominating MemoryAccess that Mod's the location
+ /// the instruction accesses (by skipping any def which AA can prove does not
+ /// alias the location(s) accessed by the instruction given).
+ ///
+ /// Note that this will return a single access, and it must dominate the
+ /// Instruction, so if an operand of a MemoryPhi node Mod's the instruction,
+ /// this will return the MemoryPhi, not the operand. This means that
+ /// given:
+ /// if (a) {
+ /// 1 = MemoryDef(liveOnEntry)
+ /// store %a
+ /// } else {
+ /// 2 = MemoryDef(liveOnEntry)
+ /// store %b
+ /// }
+ /// 3 = MemoryPhi(2, 1)
+ /// MemoryUse(3)
+ /// load %a
+ ///
+ /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef
+ /// in the if (a) branch.
+ MemoryAccess *getClobberingMemoryAccess(const Instruction *I) {
+ MemoryAccess *MA = MSSA->getMemoryAccess(I);
+ assert(MA && "Handed an instruction that MemorySSA doesn't recognize?");
+ return getClobberingMemoryAccess(MA);
+ }
+
+ /// Does the same thing as getClobberingMemoryAccess(const Instruction *I),
+ /// but takes a MemoryAccess instead of an Instruction.
+ virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0;
+
+ /// \brief Given a potentially clobbering memory access and a new location,
+ /// calling this will give you the nearest dominating clobbering MemoryAccess
+ /// (by skipping non-aliasing def links).
+ ///
+ /// This version of the function is mainly used to disambiguate phi translated
+ /// pointers, where the value of a pointer may have changed from the initial
+ /// memory access. Note that this expects to be handed either a MemoryUse,
+ /// or an already potentially clobbering access. Unlike the above API, if
+ /// given a MemoryDef that clobbers the pointer as the starting access, it
+ /// will return that MemoryDef, whereas the above would return the clobber
+ /// starting from the use side of the memory def.
+ virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
+ const MemoryLocation &) = 0;
+
+ /// \brief Given a memory access, invalidate anything this walker knows about
+ /// that access.
+ /// This API is used by walkers that store information to perform basic cache
+ /// invalidation. This will be called by MemorySSA at appropriate times for
+ /// the walker it uses or returns.
+ virtual void invalidateInfo(MemoryAccess *) {}
+
+ virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); }
+
+protected:
+ friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
+ // constructor.
+ MemorySSA *MSSA;
+};
+
+/// \brief A MemorySSAWalker that does no alias queries, or anything else. It
+/// simply returns the links as they were constructed by the builder.
+class DoNothingMemorySSAWalker final : public MemorySSAWalker {
+public:
+ // Keep the overrides below from hiding the Instruction overload of
+ // getClobberingMemoryAccess.
+ using MemorySSAWalker::getClobberingMemoryAccess;
+
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
+ MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
+ const MemoryLocation &) override;
+};
+
+using MemoryAccessPair = std::pair<MemoryAccess *, MemoryLocation>;
+using ConstMemoryAccessPair = std::pair<const MemoryAccess *, MemoryLocation>;
+
+/// \brief Iterator base class used to implement const and non-const iterators
+/// over the defining accesses of a MemoryAccess.
+template <class T>
+class memoryaccess_def_iterator_base
+ : public iterator_facade_base<memoryaccess_def_iterator_base<T>,
+ std::forward_iterator_tag, T, ptrdiff_t, T *,
+ T *> {
+ using BaseT = typename memoryaccess_def_iterator_base::iterator_facade_base;
+
+public:
+ memoryaccess_def_iterator_base(T *Start) : Access(Start) {}
+ memoryaccess_def_iterator_base() = default;
+
+ bool operator==(const memoryaccess_def_iterator_base &Other) const {
+ return Access == Other.Access && (!Access || ArgNo == Other.ArgNo);
+ }
+
+ // This is a bit ugly, but for MemoryPHI's, unlike PHINodes, you can't get the
+ // block from the operand in constant time (In a PHINode, the uselist has
+ // both, so it's just subtraction). We provide it as part of the
+ // iterator to avoid callers having to linear walk to get the block.
+ // If the operation becomes constant time on MemoryPHI's, this bit of
+ // abstraction breaking should be removed.
+ BasicBlock *getPhiArgBlock() const {
+ MemoryPhi *MP = dyn_cast<MemoryPhi>(Access);
+ assert(MP && "Tried to get phi arg block when not iterating over a PHI");
+ return MP->getIncomingBlock(ArgNo);
+ }
+ typename BaseT::iterator::pointer operator*() const {
+ assert(Access && "Tried to access past the end of our iterator");
+ // Go to the first argument for phis, and the defining access for everything
+ // else.
+ if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
+ return MP->getIncomingValue(ArgNo);
+ return cast<MemoryUseOrDef>(Access)->getDefiningAccess();
+ }
+ using BaseT::operator++;
+ memoryaccess_def_iterator &operator++() {
+ assert(Access && "Hit end of iterator");
+ if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
+ if (++ArgNo >= MP->getNumIncomingValues()) {
+ ArgNo = 0;
+ Access = nullptr;
+ }
+ } else {
+ Access = nullptr;
+ }
+ return *this;
+ }
+
+private:
+ T *Access = nullptr;
+ unsigned ArgNo = 0;
+};
+
+inline memoryaccess_def_iterator MemoryAccess::defs_begin() {
+ return memoryaccess_def_iterator(this);
+}
+
+inline const_memoryaccess_def_iterator MemoryAccess::defs_begin() const {
+ return const_memoryaccess_def_iterator(this);
+}
+
+inline memoryaccess_def_iterator MemoryAccess::defs_end() {
+ return memoryaccess_def_iterator();
+}
+
+inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const {
+ return const_memoryaccess_def_iterator();
+}
+
+/// \brief GraphTraits for a MemoryAccess, which walks defs in the normal case,
+/// and uses in the inverse case.
+template <> struct GraphTraits<MemoryAccess *> {
+ using NodeRef = MemoryAccess *;
+ using ChildIteratorType = memoryaccess_def_iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->defs_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->defs_end(); }
+};
+
+template <> struct GraphTraits<Inverse<MemoryAccess *>> {
+ using NodeRef = MemoryAccess *;
+ using ChildIteratorType = MemoryAccess::iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->user_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->user_end(); }
+};
+
+/// \brief Provide an iterator that walks defs, giving both the memory access,
+/// and the current pointer location, updating the pointer location as it
+/// changes due to phi node translation.
+///
+/// This iterator, while somewhat specialized, is what most clients actually
+/// want when walking upwards through MemorySSA def chains. It takes a pair of
+/// <MemoryAccess,MemoryLocation>, and walks defs, properly translating the
+/// memory location through phi nodes for the user.
+class upward_defs_iterator
+ : public iterator_facade_base<upward_defs_iterator,
+ std::forward_iterator_tag,
+ const MemoryAccessPair> {
+ using BaseT = upward_defs_iterator::iterator_facade_base;
+
+public:
+ upward_defs_iterator(const MemoryAccessPair &Info)
+ : DefIterator(Info.first), Location(Info.second),
+ OriginalAccess(Info.first) {
+ CurrentPair.first = nullptr;
+
+ WalkingPhi = Info.first && isa<MemoryPhi>(Info.first);
+ fillInCurrentPair();
+ }
+
+ upward_defs_iterator() { CurrentPair.first = nullptr; }
+
+ bool operator==(const upward_defs_iterator &Other) const {
+ return DefIterator == Other.DefIterator;
+ }
+
+ BaseT::iterator::reference operator*() const {
+ assert(DefIterator != OriginalAccess->defs_end() &&
+ "Tried to access past the end of our iterator");
+ return CurrentPair;
+ }
+
+ using BaseT::operator++;
+ upward_defs_iterator &operator++() {
+ assert(DefIterator != OriginalAccess->defs_end() &&
+ "Tried to access past the end of the iterator");
+ ++DefIterator;
+ if (DefIterator != OriginalAccess->defs_end())
+ fillInCurrentPair();
+ return *this;
+ }
+
+ BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); }
+
+private:
+ void fillInCurrentPair() {
+ CurrentPair.first = *DefIterator;
+ if (WalkingPhi && Location.Ptr) {
+ PHITransAddr Translator(
+ const_cast<Value *>(Location.Ptr),
+ OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr);
+ if (!Translator.PHITranslateValue(OriginalAccess->getBlock(),
+ DefIterator.getPhiArgBlock(), nullptr,
+ false))
+ if (Translator.getAddr() != Location.Ptr) {
+ CurrentPair.second = Location.getWithNewPtr(Translator.getAddr());
+ return;
+ }
+ }
+ CurrentPair.second = Location;
+ }
+
+ MemoryAccessPair CurrentPair;
+ memoryaccess_def_iterator DefIterator;
+ MemoryLocation Location;
+ MemoryAccess *OriginalAccess = nullptr;
+ bool WalkingPhi = false;
+};
+
+inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair) {
+ return upward_defs_iterator(Pair);
+}
+
+inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); }
+
+inline iterator_range<upward_defs_iterator>
+upward_defs(const MemoryAccessPair &Pair) {
+ return make_range(upward_defs_begin(Pair), upward_defs_end());
+}
+
+/// Walks the defining accesses of MemoryDefs. Stops after we hit something that
+/// has no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when
+/// comparing against a null def_chain_iterator, this will compare equal only
+/// after walking said Phi/liveOnEntry.
+///
+/// The UseOptimizedChain flag specifies whether to walk the clobbering
+/// access chain, or all the accesses.
+///
+/// Normally, MemoryDef are all just def/use linked together, so a def_chain on
+/// a MemoryDef will walk all MemoryDefs above it in the program until it hits
+/// a phi node. The optimized chain walks the clobbering access of a store.
+/// So if you are just trying to find, given a store, what the next
+/// thing that would clobber the same memory is, you want the optimized chain.
+template <class T, bool UseOptimizedChain = false>
+struct def_chain_iterator
+ : public iterator_facade_base<def_chain_iterator<T, UseOptimizedChain>,
+ std::forward_iterator_tag, MemoryAccess *> {
+ def_chain_iterator() : MA(nullptr) {}
+ def_chain_iterator(T MA) : MA(MA) {}
+
+ T operator*() const { return MA; }
+
+ def_chain_iterator &operator++() {
+ // N.B. liveOnEntry has a null defining access.
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
+ if (UseOptimizedChain && MUD->isOptimized())
+ MA = MUD->getOptimized();
+ else
+ MA = MUD->getDefiningAccess();
+ } else {
+ MA = nullptr;
+ }
+
+ return *this;
+ }
+
+ bool operator==(const def_chain_iterator &O) const { return MA == O.MA; }
+
+private:
+ T MA;
+};
+
+template <class T>
+inline iterator_range<def_chain_iterator<T>>
+def_chain(T MA, MemoryAccess *UpTo = nullptr) {
+#ifdef EXPENSIVE_CHECKS
+ assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) &&
+ "UpTo isn't in the def chain!");
+#endif
+ return make_range(def_chain_iterator<T>(MA), def_chain_iterator<T>(UpTo));
+}
+
+template <class T>
+inline iterator_range<def_chain_iterator<T, true>> optimized_def_chain(T MA) {
+ return make_range(def_chain_iterator<T, true>(MA),
+ def_chain_iterator<T, true>(nullptr));
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_MEMORYSSA_H
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
new file mode 100644
index 000000000000..d30eeeaa95b6
--- /dev/null
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -0,0 +1,153 @@
+//===- MemorySSAUpdater.h - Memory SSA Updater-------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// \brief An automatic updater for MemorySSA that handles arbitrary insertion,
+// deletion, and moves. It performs phi insertion where necessary, and
+// automatically updates the MemorySSA IR to be correct.
+// While updating loads or removing instructions is often easy enough to not
+// need this, updating stores should generally not be attemped outside this
+// API.
+//
+// Basic API usage:
+// Create the memory access you want for the instruction (this is mainly so
+// we know where it is, without having to duplicate the entire set of create
+// functions MemorySSA supports).
+// Call insertDef or insertUse depending on whether it's a MemoryUse or a
+// MemoryDef.
+// That's it.
+//
+// For moving, first, move the instruction itself using the normal SSA
+// instruction moving API, then just call moveBefore, moveAfter,or moveTo with
+// the right arguments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H
+#define LLVM_ANALYSIS_MEMORYSSAUPDATER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/MemorySSA.h"
+
+namespace llvm {
+
+class Function;
+class Instruction;
+class MemoryAccess;
+class LLVMContext;
+class raw_ostream;
+
+class MemorySSAUpdater {
+private:
+ MemorySSA *MSSA;
+ SmallVector<MemoryPhi *, 8> InsertedPHIs;
+ SmallPtrSet<BasicBlock *, 8> VisitedBlocks;
+
+public:
+ MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {}
+ /// Insert a definition into the MemorySSA IR. RenameUses will rename any use
+ /// below the new def block (and any inserted phis). RenameUses should be set
+ /// to true if the definition may cause new aliases for loads below it. This
+ /// is not the case for hoisting or sinking or other forms of code *movement*.
+ /// It *is* the case for straight code insertion.
+ /// For example:
+ /// store a
+ /// if (foo) { }
+ /// load a
+ ///
+ /// Moving the store into the if block, and calling insertDef, does not
+ /// require RenameUses.
+ /// However, changing it to:
+ /// store a
+ /// if (foo) { store b }
+ /// load a
+ /// Where a mayalias b, *does* require RenameUses be set to true.
+ void insertDef(MemoryDef *Def, bool RenameUses = false);
+ void insertUse(MemoryUse *Use);
+ void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where);
+ void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where);
+ void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
+ MemorySSA::InsertionPlace Where);
+
+ // The below are utility functions. Other than creation of accesses to pass
+ // to insertDef, and removeAccess to remove accesses, you should generally
+ // not attempt to update memoryssa yourself. It is very non-trivial to get
+ // the edge cases right, and the above calls already operate in near-optimal
+ // time bounds.
+
+ /// \brief Create a MemoryAccess in MemorySSA at a specified point in a block,
+ /// with a specified clobbering definition.
+ ///
+ /// Returns the new MemoryAccess.
+ /// This should be called when a memory instruction is created that is being
+ /// used to replace an existing memory instruction. It will *not* create PHI
+ /// nodes, or verify the clobbering definition. The insertion place is used
+ /// solely to determine where in the memoryssa access lists the instruction
+ /// will be placed. The caller is expected to keep ordering the same as
+ /// instructions.
+ /// It will return the new MemoryAccess.
+ /// Note: If a MemoryAccess already exists for I, this function will make it
+ /// inaccessible and it *must* have removeMemoryAccess called on it.
+ MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition,
+ const BasicBlock *BB,
+ MemorySSA::InsertionPlace Point);
+
+ /// \brief Create a MemoryAccess in MemorySSA before or after an existing
+ /// MemoryAccess.
+ ///
+ /// Returns the new MemoryAccess.
+ /// This should be called when a memory instruction is created that is being
+ /// used to replace an existing memory instruction. It will *not* create PHI
+ /// nodes, or verify the clobbering definition.
+ ///
+ /// Note: If a MemoryAccess already exists for I, this function will make it
+ /// inaccessible and it *must* have removeMemoryAccess called on it.
+ MemoryUseOrDef *createMemoryAccessBefore(Instruction *I,
+ MemoryAccess *Definition,
+ MemoryUseOrDef *InsertPt);
+ MemoryUseOrDef *createMemoryAccessAfter(Instruction *I,
+ MemoryAccess *Definition,
+ MemoryAccess *InsertPt);
+
+ /// \brief Remove a MemoryAccess from MemorySSA, including updating all
+ /// definitions and uses.
+ /// This should be called when a memory instruction that has a MemoryAccess
+ /// associated with it is erased from the program. For example, if a store or
+ /// load is simply erased (not replaced), removeMemoryAccess should be called
+ /// on the MemoryAccess for that store/load.
+ void removeMemoryAccess(MemoryAccess *);
+
+private:
+ // Move What before Where in the MemorySSA IR.
+ template <class WhereType>
+ void moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where);
+ MemoryAccess *getPreviousDef(MemoryAccess *);
+ MemoryAccess *getPreviousDefInBlock(MemoryAccess *);
+ MemoryAccess *getPreviousDefFromEnd(BasicBlock *);
+ MemoryAccess *getPreviousDefRecursive(BasicBlock *);
+ MemoryAccess *recursePhi(MemoryAccess *Phi);
+ template <class RangeType>
+ MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
+ void fixupDefs(const SmallVectorImpl<MemoryAccess *> &);
+};
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_MEMORYSSAUPDATER_H
diff --git a/include/llvm/Analysis/ObjectUtils.h b/include/llvm/Analysis/ObjectUtils.h
new file mode 100644
index 000000000000..2ad3b1717009
--- /dev/null
+++ b/include/llvm/Analysis/ObjectUtils.h
@@ -0,0 +1,42 @@
+//===- Analysis/ObjectUtils.h - analysis utils for object files -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_OBJECT_UTILS_H
+#define LLVM_ANALYSIS_OBJECT_UTILS_H
+
+#include "llvm/IR/GlobalVariable.h"
+
+namespace llvm {
+
+/// True if GV can be left out of the object symbol table. This is the case
+/// for linkonce_odr values whose address is not significant. While legal, it is
+/// not normally profitable to omit them from the .o symbol table. Using this
+/// analysis makes sense when the information can be passed down to the linker
+/// or we are in LTO.
+inline bool canBeOmittedFromSymbolTable(const GlobalValue *GV) {
+ if (!GV->hasLinkOnceODRLinkage())
+ return false;
+
+ // We assume that anyone who sets global unnamed_addr on a non-constant knows
+ // what they're doing.
+ if (GV->hasGlobalUnnamedAddr())
+ return true;
+
+ // If it is a non constant variable, it needs to be uniqued across shared
+ // objects.
+ if (auto *Var = dyn_cast<GlobalVariable>(GV))
+ if (!Var->isConstant())
+ return false;
+
+ return GV->hasAtLeastLocalUnnamedAddr();
+}
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/include/llvm/Analysis/OptimizationDiagnosticInfo.h
index 39269269c244..edd9140a3493 100644
--- a/include/llvm/Analysis/OptimizationDiagnosticInfo.h
+++ b/include/llvm/Analysis/OptimizationDiagnosticInfo.h
@@ -38,7 +38,7 @@ class Value;
/// enabled in the LLVM context.
class OptimizationRemarkEmitter {
public:
- OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI)
+ OptimizationRemarkEmitter(const Function *F, BlockFrequencyInfo *BFI)
: F(F), BFI(BFI) {}
/// \brief This variant can be used to generate ORE on demand (without the
@@ -52,7 +52,7 @@ public:
/// operation since BFI and all its required analyses are computed. This is
/// for example useful for CGSCC passes that can't use function analyses
/// passes in the old PM.
- OptimizationRemarkEmitter(Function *F);
+ OptimizationRemarkEmitter(const Function *F);
OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg)
: F(Arg.F), BFI(Arg.BFI) {}
@@ -63,136 +63,16 @@ public:
return *this;
}
- /// The new interface to emit remarks.
- void emit(DiagnosticInfoOptimizationBase &OptDiag);
-
- /// Emit an optimization-applied message.
- ///
- /// \p PassName is the name of the pass emitting the message. If -Rpass= is
- /// given and \p PassName matches the regular expression in -Rpass, then the
- /// remark will be emitted. \p Fn is the function triggering the remark, \p
- /// DLoc is the debug location where the diagnostic is generated. \p V is the
- /// IR Value that identifies the code region. \p Msg is the message string to
- /// use.
- void emitOptimizationRemark(const char *PassName, const DebugLoc &DLoc,
- const Value *V, const Twine &Msg);
-
- /// \brief Same as above but derives the IR Value for the code region and the
- /// debug location from the Loop parameter \p L.
- void emitOptimizationRemark(const char *PassName, Loop *L, const Twine &Msg);
-
- /// \brief Same as above but derives the debug location and the code region
- /// from the debug location and the basic block of \p Inst, respectively.
- void emitOptimizationRemark(const char *PassName, Instruction *Inst,
- const Twine &Msg) {
- emitOptimizationRemark(PassName, Inst->getDebugLoc(), Inst->getParent(),
- Msg);
- }
-
- /// Emit an optimization-missed message.
- ///
- /// \p PassName is the name of the pass emitting the message. If
- /// -Rpass-missed= is given and the name matches the regular expression in
- /// -Rpass, then the remark will be emitted. \p DLoc is the debug location
- /// where the diagnostic is generated. \p V is the IR Value that identifies
- /// the code region. \p Msg is the message string to use. If \p IsVerbose is
- /// true, the message is considered verbose and will only be emitted when
- /// verbose output is turned on.
- void emitOptimizationRemarkMissed(const char *PassName, const DebugLoc &DLoc,
- const Value *V, const Twine &Msg,
- bool IsVerbose = false);
-
- /// \brief Same as above but derives the IR Value for the code region and the
- /// debug location from the Loop parameter \p L.
- void emitOptimizationRemarkMissed(const char *PassName, Loop *L,
- const Twine &Msg, bool IsVerbose = false);
-
- /// \brief Same as above but derives the debug location and the code region
- /// from the debug location and the basic block of \p Inst, respectively.
- void emitOptimizationRemarkMissed(const char *PassName, Instruction *Inst,
- const Twine &Msg, bool IsVerbose = false) {
- emitOptimizationRemarkMissed(PassName, Inst->getDebugLoc(),
- Inst->getParent(), Msg, IsVerbose);
- }
-
- /// Emit an optimization analysis remark message.
- ///
- /// \p PassName is the name of the pass emitting the message. If
- /// -Rpass-analysis= is given and \p PassName matches the regular expression
- /// in -Rpass, then the remark will be emitted. \p DLoc is the debug location
- /// where the diagnostic is generated. \p V is the IR Value that identifies
- /// the code region. \p Msg is the message string to use. If \p IsVerbose is
- /// true, the message is considered verbose and will only be emitted when
- /// verbose output is turned on.
- void emitOptimizationRemarkAnalysis(const char *PassName,
- const DebugLoc &DLoc, const Value *V,
- const Twine &Msg, bool IsVerbose = false);
-
- /// \brief Same as above but derives the IR Value for the code region and the
- /// debug location from the Loop parameter \p L.
- void emitOptimizationRemarkAnalysis(const char *PassName, Loop *L,
- const Twine &Msg, bool IsVerbose = false);
+ /// Handle invalidation events in the new pass manager.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv);
- /// \brief Same as above but derives the debug location and the code region
- /// from the debug location and the basic block of \p Inst, respectively.
- void emitOptimizationRemarkAnalysis(const char *PassName, Instruction *Inst,
- const Twine &Msg,
- bool IsVerbose = false) {
- emitOptimizationRemarkAnalysis(PassName, Inst->getDebugLoc(),
- Inst->getParent(), Msg, IsVerbose);
- }
-
- /// \brief This variant allows specifying what should be emitted for missed
- /// and analysis remarks in one call.
- ///
- /// \p PassName is the name of the pass emitting the message. If
- /// -Rpass-missed= is given and \p PassName matches the regular expression, \p
- /// MsgForMissedRemark is emitted.
- ///
- /// If -Rpass-analysis= is given and \p PassName matches the regular
- /// expression, \p MsgForAnalysisRemark is emitted.
- ///
- /// The debug location and the code region is derived from \p Inst. If \p
- /// IsVerbose is true, the message is considered verbose and will only be
- /// emitted when verbose output is turned on.
- void emitOptimizationRemarkMissedAndAnalysis(
- const char *PassName, Instruction *Inst, const Twine &MsgForMissedRemark,
- const Twine &MsgForAnalysisRemark, bool IsVerbose = false) {
- emitOptimizationRemarkAnalysis(PassName, Inst, MsgForAnalysisRemark,
- IsVerbose);
- emitOptimizationRemarkMissed(PassName, Inst, MsgForMissedRemark, IsVerbose);
- }
-
- /// \brief Emit an optimization analysis remark related to floating-point
- /// non-commutativity.
+ /// \brief Output the remark via the diagnostic handler and to the
+ /// optimization record file.
///
- /// \p PassName is the name of the pass emitting the message. If
- /// -Rpass-analysis= is given and \p PassName matches the regular expression
- /// in -Rpass, then the remark will be emitted. \p Fn is the function
- /// triggering the remark, \p DLoc is the debug location where the diagnostic
- /// is generated.\p V is the IR Value that identifies the code region. \p Msg
- /// is the message string to use.
- void emitOptimizationRemarkAnalysisFPCommute(const char *PassName,
- const DebugLoc &DLoc,
- const Value *V,
- const Twine &Msg);
-
- /// \brief Emit an optimization analysis remark related to pointer aliasing.
- ///
- /// \p PassName is the name of the pass emitting the message. If
- /// -Rpass-analysis= is given and \p PassName matches the regular expression
- /// in -Rpass, then the remark will be emitted. \p Fn is the function
- /// triggering the remark, \p DLoc is the debug location where the diagnostic
- /// is generated.\p V is the IR Value that identifies the code region. \p Msg
- /// is the message string to use.
- void emitOptimizationRemarkAnalysisAliasing(const char *PassName,
- const DebugLoc &DLoc,
- const Value *V, const Twine &Msg);
-
- /// \brief Same as above but derives the IR Value for the code region and the
- /// debug location from the Loop parameter \p L.
- void emitOptimizationRemarkAnalysisAliasing(const char *PassName, Loop *L,
- const Twine &Msg);
+ /// This is the new interface that should be now used rather than the legacy
+ /// emit* APIs.
+ void emit(DiagnosticInfoOptimizationBase &OptDiag);
/// \brief Whether we allow for extra compile-time budget to perform more
/// analysis to produce fewer false positives.
@@ -208,7 +88,7 @@ public:
}
private:
- Function *F;
+ const Function *F;
BlockFrequencyInfo *BFI;
@@ -220,7 +100,7 @@ private:
Optional<uint64_t> computeHotness(const Value *V);
/// Similar but use value from \p OptDiag and update hotness there.
- void computeHotness(DiagnosticInfoOptimizationBase &OptDiag);
+ void computeHotness(DiagnosticInfoIROptimization &OptDiag);
/// \brief Only allow verbose messages if we know we're filtering by hotness
/// (BFI is only set in this case).
@@ -274,5 +154,11 @@ public:
/// \brief Run the analysis pass over a function and produce BFI.
Result run(Function &F, FunctionAnalysisManager &AM);
};
+
+namespace yaml {
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
+ static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag);
+};
+}
}
#endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index 34361dac8c16..94ee3b03bb86 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -26,6 +26,10 @@ struct PostDominatorTree : public DominatorTreeBase<BasicBlock> {
typedef DominatorTreeBase<BasicBlock> Base;
PostDominatorTree() : DominatorTreeBase<BasicBlock>(true) {}
+
+ /// Handle invalidation explicitly.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &);
};
/// \brief Analysis pass which computes a \c PostDominatorTree.
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index d7fe76e278e3..1aec35c3e677 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -29,6 +29,7 @@
namespace llvm {
class BasicBlock;
class BlockFrequencyInfo;
+class CallSite;
class ProfileSummary;
/// \brief Analysis providing profile information.
///
@@ -44,7 +45,7 @@ class ProfileSummaryInfo {
private:
Module &M;
std::unique_ptr<ProfileSummary> Summary;
- void computeSummary();
+ bool computeSummary();
void computeThresholds();
// Count thresholds to answer isHotCount and isColdCount queries.
Optional<uint64_t> HotCountThreshold, ColdCountThreshold;
@@ -53,16 +54,29 @@ public:
ProfileSummaryInfo(Module &M) : M(M) {}
ProfileSummaryInfo(ProfileSummaryInfo &&Arg)
: M(Arg.M), Summary(std::move(Arg.Summary)) {}
+ /// Returns the profile count for \p CallInst.
+ static Optional<uint64_t> getProfileCount(const Instruction *CallInst,
+ BlockFrequencyInfo *BFI);
/// \brief Returns true if \p F has hot function entry.
bool isFunctionEntryHot(const Function *F);
+ /// Returns true if \p F has hot function entry or hot call edge.
+ bool isFunctionHotInCallGraph(const Function *F);
/// \brief Returns true if \p F has cold function entry.
bool isFunctionEntryCold(const Function *F);
+ /// Returns true if \p F has cold function entry or cold call edge.
+ bool isFunctionColdInCallGraph(const Function *F);
/// \brief Returns true if \p F is a hot function.
bool isHotCount(uint64_t C);
/// \brief Returns true if count \p C is considered cold.
bool isColdCount(uint64_t C);
/// \brief Returns true if BasicBlock \p B is considered hot.
bool isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
+ /// \brief Returns true if BasicBlock \p B is considered cold.
+ bool isColdBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
+ /// \brief Returns true if CallSite \p CS is considered hot.
+ bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
+ /// \brief Returns true if Callsite \p CS is considered cold.
+ bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
};
/// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h
index 6e61fc3be384..2fe7c6725266 100644
--- a/include/llvm/Analysis/PtrUseVisitor.h
+++ b/include/llvm/Analysis/PtrUseVisitor.h
@@ -196,7 +196,10 @@ class PtrUseVisitor : protected InstVisitor<DerivedT>,
typedef InstVisitor<DerivedT> Base;
public:
- PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {}
+ PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {
+ static_assert(std::is_base_of<PtrUseVisitor, DerivedT>::value,
+ "Must pass the derived type to this template!");
+ }
/// \brief Recursively visit the uses of the given pointer.
/// \returns An info struct about the pointer. See \c PtrInfo for details.
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index f2f27a137a88..caeb21db613e 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -886,6 +886,10 @@ public:
return *this;
}
+ /// Handle invalidation explicitly.
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &);
+
// updateStatistics - Update statistic about created regions.
void updateStatistics(Region *R) final;
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 1a93f9aa5fd2..9a50de540f2b 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -543,6 +543,12 @@ private:
/// predicate by splitting it into a set of independent predicates.
bool ProvingSplitPredicate;
+ /// Memoized values for the GetMinTrailingZeros
+ DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
+
+ /// Private helper method for the GetMinTrailingZeros method
+ uint32_t GetMinTrailingZerosImpl(const SCEV *S);
+
/// Information about the number of loop iterations for which a loop exit's
/// branch condition evaluates to the not-taken path. This is a temporary
/// pair of exact and max expressions that are eventually summarized in
@@ -600,14 +606,14 @@ private:
/// Information about the number of times a particular loop exit may be
/// reached before exiting the loop.
struct ExitNotTakenInfo {
- AssertingVH<BasicBlock> ExitingBlock;
+ PoisoningVH<BasicBlock> ExitingBlock;
const SCEV *ExactNotTaken;
std::unique_ptr<SCEVUnionPredicate> Predicate;
bool hasAlwaysTruePredicate() const {
return !Predicate || Predicate->isAlwaysTrue();
}
- explicit ExitNotTakenInfo(AssertingVH<BasicBlock> ExitingBlock,
+ explicit ExitNotTakenInfo(PoisoningVH<BasicBlock> ExitingBlock,
const SCEV *ExactNotTaken,
std::unique_ptr<SCEVUnionPredicate> Predicate)
: ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken),
@@ -972,6 +978,20 @@ private:
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+ /// true. Here LHS is an operation that includes FoundLHS as one of its
+ /// arguments.
+ bool isImpliedViaOperations(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS, const SCEV *FoundRHS,
+ unsigned Depth = 0);
+
+ /// Test whether the condition described by Pred, LHS, and RHS is true.
+ /// Use only simple non-recursive types of checks, such as range analysis etc.
+ bool isKnownViaSimpleReasoning(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS);
+
+ /// Test whether the condition described by Pred, LHS, and RHS is true
+ /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
/// true.
bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS, const SCEV *FoundLHS,
@@ -1065,18 +1085,6 @@ private:
bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
ICmpInst::Predicate Pred, bool &Increasing);
- /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
- /// is monotonically increasing or decreasing. In the former case set
- /// `Increasing` to true and in the latter case set `Increasing` to false.
- ///
- /// A predicate is said to be monotonically increasing if may go from being
- /// false to being true as the loop iterates, but never the other way
- /// around. A predicate is said to be monotonically decreasing if may go
- /// from being true to being false as the loop iterates, but never the other
- /// way around.
- bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
- bool &Increasing);
-
/// Return SCEV no-wrap flags that can be proven based on reasoning about
/// how poison produced from no-wrap flags on this value (e.g. a nuw add)
/// would trigger undefined behavior on overflow.
@@ -1129,6 +1137,9 @@ public:
/// return true. For pointer types, this is the pointer-sized integer type.
Type *getEffectiveSCEVType(Type *Ty) const;
+ // Returns a wider type among {Ty1, Ty2}.
+ Type *getWiderType(Type *Ty1, Type *Ty2) const;
+
/// Return true if the SCEV is a scAddRecExpr or it contains
/// scAddRecExpr. The result will be cached in HasRecMap.
///
@@ -1152,7 +1163,8 @@ public:
const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty);
const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
- SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
+ SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
+ unsigned Depth = 0);
const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
@@ -1301,7 +1313,7 @@ public:
///
/// Implemented in terms of the \c getSmallConstantTripCount overload with
/// the single exiting block passed to it. See that routine for details.
- unsigned getSmallConstantTripCount(Loop *L);
+ unsigned getSmallConstantTripCount(const Loop *L);
/// Returns the maximum trip count of this loop as a normal unsigned
/// value. Returns 0 if the trip count is unknown or not constant. This
@@ -1310,12 +1322,12 @@ public:
/// before taking the branch. For loops with multiple exits, it may not be
/// the number times that the loop header executes if the loop exits
/// prematurely via another branch.
- unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
+ unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock);
/// Returns the upper bound of the loop trip count as a normal unsigned
/// value.
/// Returns 0 if the trip count is unknown or not constant.
- unsigned getSmallConstantMaxTripCount(Loop *L);
+ unsigned getSmallConstantMaxTripCount(const Loop *L);
/// Returns the largest constant divisor of the trip count of the
/// loop if it is a single-exit loop and we can compute a small maximum for
@@ -1323,7 +1335,7 @@ public:
///
/// Implemented in terms of the \c getSmallConstantTripMultiple overload with
/// the single exiting block passed to it. See that routine for details.
- unsigned getSmallConstantTripMultiple(Loop *L);
+ unsigned getSmallConstantTripMultiple(const Loop *L);
/// Returns the largest constant divisor of the trip count of this loop as a
/// normal unsigned value, if possible. This means that the actual trip
@@ -1331,12 +1343,13 @@ public:
/// count could very well be zero as well!). As explained in the comments
/// for getSmallConstantTripCount, this assumes that control exits the loop
/// via ExitingBlock.
- unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock);
+ unsigned getSmallConstantTripMultiple(const Loop *L,
+ BasicBlock *ExitingBlock);
/// Get the expression for the number of loop iterations for which this loop
/// is guaranteed not to exit via ExitingBlock. Otherwise return
/// SCEVCouldNotCompute.
- const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock);
+ const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock);
/// If the specified loop has a predictable backedge-taken count, return it,
/// otherwise return a SCEVCouldNotCompute object. The backedge-taken count
@@ -1432,6 +1445,18 @@ public:
bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS);
+ /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
+ /// is monotonically increasing or decreasing. In the former case set
+ /// `Increasing` to true and in the latter case set `Increasing` to false.
+ ///
+ /// A predicate is said to be monotonically increasing if may go from being
+ /// false to being true as the loop iterates, but never the other way
+ /// around. A predicate is said to be monotonically decreasing if may go
+ /// from being true to being false as the loop iterates, but never the other
+ /// way around.
+ bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
+ bool &Increasing);
+
/// Return true if the result of the predicate LHS `Pred` RHS is loop
/// invariant with respect to L. Set InvariantPred, InvariantLHS and
/// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the
@@ -1613,6 +1638,10 @@ private:
bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
bool NoWrap);
+ /// Get add expr already created or create a new one
+ const SCEV *getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags);
+
private:
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index fdcd8be00dde..2c693bceb24d 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -595,58 +595,82 @@ namespace llvm {
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
- return SE.getTruncateExpr(Operand, Expr->getType());
+ return Operand == Expr->getOperand()
+ ? Expr
+ : SE.getTruncateExpr(Operand, Expr->getType());
}
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
- return SE.getZeroExtendExpr(Operand, Expr->getType());
+ return Operand == Expr->getOperand()
+ ? Expr
+ : SE.getZeroExtendExpr(Operand, Expr->getType());
}
const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
- return SE.getSignExtendExpr(Operand, Expr->getType());
+ return Operand == Expr->getOperand()
+ ? Expr
+ : SE.getSignExtendExpr(Operand, Expr->getType());
}
const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
- return SE.getAddExpr(Operands);
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC*)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getAddExpr(Operands);
}
const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
- return SE.getMulExpr(Operands);
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC*)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getMulExpr(Operands);
}
const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
- return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()),
- ((SC*)this)->visit(Expr->getRHS()));
+ auto *LHS = ((SC *)this)->visit(Expr->getLHS());
+ auto *RHS = ((SC *)this)->visit(Expr->getRHS());
+ bool Changed = LHS != Expr->getLHS() || RHS != Expr->getRHS();
+ return !Changed ? Expr : SE.getUDivExpr(LHS, RHS);
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
- return SE.getAddRecExpr(Operands, Expr->getLoop(),
- Expr->getNoWrapFlags());
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC*)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr
+ : SE.getAddRecExpr(Operands, Expr->getLoop(),
+ Expr->getNoWrapFlags());
}
const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
- return SE.getSMaxExpr(Operands);
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC *)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getSMaxExpr(Operands);
}
const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
- return SE.getUMaxExpr(Operands);
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC*)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getUMaxExpr(Operands);
}
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h
index 7c6423a21cfa..b73ad95278a0 100644
--- a/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -37,42 +37,33 @@
#define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
namespace llvm {
-class Instruction;
-class DominatorTree;
class Loop;
class ScalarEvolution;
class SCEV;
-class Value;
-/// TransformKind - Different types of transformations that
-/// TransformForPostIncUse can do.
-enum TransformKind {
- /// Normalize - Normalize according to the given loops.
- Normalize,
- /// NormalizeAutodetect - Detect post-inc opportunities on new expressions,
- /// update the given loop set, and normalize.
- NormalizeAutodetect,
- /// Denormalize - Perform the inverse transform on the expression with the
- /// given loop set.
- Denormalize
-};
-
-/// PostIncLoopSet - A set of loops.
typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet;
-/// TransformForPostIncUse - Transform the given expression according to the
-/// given transformation kind.
-const SCEV *TransformForPostIncUse(TransformKind Kind,
- const SCEV *S,
- Instruction *User,
- Value *OperandValToReplace,
- PostIncLoopSet &Loops,
- ScalarEvolution &SE,
- DominatorTree &DT);
+typedef function_ref<bool(const SCEVAddRecExpr *)> NormalizePredTy;
+
+/// Normalize \p S to be post-increment for all loops present in \p
+/// Loops.
+const SCEV *normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops,
+ ScalarEvolution &SE);
+
+/// Normalize \p S for all add recurrence sub-expressions for which \p
+/// Pred returns true.
+const SCEV *normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred,
+ ScalarEvolution &SE);
-}
+/// Denormalize \p S to be post-increment for all loops present in \p
+/// Loops.
+const SCEV *denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops,
+ ScalarEvolution &SE);
+} // namespace llvm
#endif
diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def
index 5d5e5b127e63..637fc7ed30dd 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/include/llvm/Analysis/TargetLibraryInfo.def
@@ -20,7 +20,7 @@
// One of TLI_DEFINE_ENUM/STRING are defined.
#if defined(TLI_DEFINE_ENUM)
-#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) enum_variant,
+#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) LibFunc_##enum_variant,
#define TLI_DEFINE_STRING_INTERNAL(string_repr)
#else
#define TLI_DEFINE_ENUM_INTERNAL(enum_variant)
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index 8675882431d5..944250cfd6ac 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -30,14 +30,12 @@ struct VecDesc {
unsigned VectorizationFactor;
};
- namespace LibFunc {
- enum Func {
+ enum LibFunc {
#define TLI_DEFINE_ENUM
#include "llvm/Analysis/TargetLibraryInfo.def"
- NumLibFuncs
- };
- }
+ NumLibFuncs
+ };
/// Implementation of the target library information.
///
@@ -48,9 +46,9 @@ struct VecDesc {
class TargetLibraryInfoImpl {
friend class TargetLibraryInfo;
- unsigned char AvailableArray[(LibFunc::NumLibFuncs+3)/4];
+ unsigned char AvailableArray[(NumLibFuncs+3)/4];
llvm::DenseMap<unsigned, std::string> CustomNames;
- static StringRef const StandardNames[LibFunc::NumLibFuncs];
+ static StringRef const StandardNames[NumLibFuncs];
bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
enum AvailabilityState {
@@ -58,11 +56,11 @@ class TargetLibraryInfoImpl {
CustomName = 1,
Unavailable = 0 // (memset to all zeros)
};
- void setState(LibFunc::Func F, AvailabilityState State) {
+ void setState(LibFunc F, AvailabilityState State) {
AvailableArray[F/4] &= ~(3 << 2*(F&3));
AvailableArray[F/4] |= State << 2*(F&3);
}
- AvailabilityState getState(LibFunc::Func F) const {
+ AvailabilityState getState(LibFunc F) const {
return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3);
}
@@ -74,7 +72,7 @@ class TargetLibraryInfoImpl {
/// Return true if the function type FTy is valid for the library function
/// F, regardless of whether the function is available.
- bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc::Func F,
+ bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
const DataLayout *DL) const;
public:
@@ -104,28 +102,28 @@ public:
///
/// If it is one of the known library functions, return true and set F to the
/// corresponding value.
- bool getLibFunc(StringRef funcName, LibFunc::Func &F) const;
+ bool getLibFunc(StringRef funcName, LibFunc &F) const;
/// Searches for a particular function name, also checking that its type is
/// valid for the library function matching that name.
///
/// If it is one of the known library functions, return true and set F to the
/// corresponding value.
- bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const;
+ bool getLibFunc(const Function &FDecl, LibFunc &F) const;
/// Forces a function to be marked as unavailable.
- void setUnavailable(LibFunc::Func F) {
+ void setUnavailable(LibFunc F) {
setState(F, Unavailable);
}
/// Forces a function to be marked as available.
- void setAvailable(LibFunc::Func F) {
+ void setAvailable(LibFunc F) {
setState(F, StandardName);
}
/// Forces a function to be marked as available and provide an alternate name
/// that must be used.
- void setAvailableWithName(LibFunc::Func F, StringRef Name) {
+ void setAvailableWithName(LibFunc F, StringRef Name) {
if (StandardNames[F] != Name) {
setState(F, CustomName);
CustomNames[F] = Name;
@@ -225,16 +223,16 @@ public:
///
/// If it is one of the known library functions, return true and set F to the
/// corresponding value.
- bool getLibFunc(StringRef funcName, LibFunc::Func &F) const {
+ bool getLibFunc(StringRef funcName, LibFunc &F) const {
return Impl->getLibFunc(funcName, F);
}
- bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const {
+ bool getLibFunc(const Function &FDecl, LibFunc &F) const {
return Impl->getLibFunc(FDecl, F);
}
/// Tests whether a library function is available.
- bool has(LibFunc::Func F) const {
+ bool has(LibFunc F) const {
return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable;
}
bool isFunctionVectorizable(StringRef F, unsigned VF) const {
@@ -249,37 +247,37 @@ public:
/// Tests if the function is both available and a candidate for optimized code
/// generation.
- bool hasOptimizedCodeGen(LibFunc::Func F) const {
+ bool hasOptimizedCodeGen(LibFunc F) const {
if (Impl->getState(F) == TargetLibraryInfoImpl::Unavailable)
return false;
switch (F) {
default: break;
- case LibFunc::copysign: case LibFunc::copysignf: case LibFunc::copysignl:
- case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl:
- case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl:
- case LibFunc::cos: case LibFunc::cosf: case LibFunc::cosl:
- case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl:
- case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite:
- case LibFunc::sqrtl_finite:
- case LibFunc::fmax: case LibFunc::fmaxf: case LibFunc::fmaxl:
- case LibFunc::fmin: case LibFunc::fminf: case LibFunc::fminl:
- case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl:
- case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl:
- case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill:
- case LibFunc::rint: case LibFunc::rintf: case LibFunc::rintl:
- case LibFunc::round: case LibFunc::roundf: case LibFunc::roundl:
- case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl:
- case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l:
- case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l:
- case LibFunc::memcmp: case LibFunc::strcmp: case LibFunc::strcpy:
- case LibFunc::stpcpy: case LibFunc::strlen: case LibFunc::strnlen:
- case LibFunc::memchr: case LibFunc::mempcpy:
+ case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl:
+ case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl:
+ case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl:
+ case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl:
+ case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
+ case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl:
+ case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl:
+ case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl:
+ case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl:
+ case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill:
+ case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl:
+ case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl:
+ case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
+ case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
+ case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
+ case LibFunc_memcmp: case LibFunc_strcmp: case LibFunc_strcpy:
+ case LibFunc_stpcpy: case LibFunc_strlen: case LibFunc_strnlen:
+ case LibFunc_memchr: case LibFunc_mempcpy:
return true;
}
return false;
}
- StringRef getName(LibFunc::Func F) const {
+ StringRef getName(LibFunc F) const {
auto State = Impl->getState(F);
if (State == TargetLibraryInfoImpl::Unavailable)
return StringRef();
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 209f05c279d0..67196687d556 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -44,23 +44,26 @@ class Value;
/// \brief Information about a load/store intrinsic defined by the target.
struct MemIntrinsicInfo {
- MemIntrinsicInfo()
- : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
- NumMemRefs(0), PtrVal(nullptr) {}
- bool ReadMem;
- bool WriteMem;
- /// True only if this memory operation is non-volatile, non-atomic, and
- /// unordered. (See LoadInst/StoreInst for details on each)
- bool IsSimple;
- // Same Id is set by the target for corresponding load/store intrinsics.
- unsigned short MatchingId;
- int NumMemRefs;
-
/// This is the pointer that the intrinsic is loading from or storing to.
/// If this is non-null, then analysis/optimization passes can assume that
/// this intrinsic is functionally equivalent to a load/store from this
/// pointer.
- Value *PtrVal;
+ Value *PtrVal = nullptr;
+
+ // Ordering for atomic operations.
+ AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
+
+ // Same Id is set by the target for corresponding load/store intrinsics.
+ unsigned short MatchingId = 0;
+
+ bool ReadMem = false;
+ bool WriteMem = false;
+ bool IsVolatile = false;
+
+ bool isUnordered() const {
+ return (Ordering == AtomicOrdering::NotAtomic ||
+ Ordering == AtomicOrdering::Unordered) && !IsVolatile;
+ }
};
/// \brief This pass provides access to the codegen interfaces that are needed
@@ -226,6 +229,24 @@ public:
/// starting with the sources of divergence.
bool isSourceOfDivergence(const Value *V) const;
+ /// Returns the address space ID for a target's 'flat' address space. Note
+ /// this is not necessarily the same as addrspace(0), which LLVM sometimes
+ /// refers to as the generic address space. The flat address space is a
+ /// generic address space that can be used access multiple segments of memory
+ /// with different address spaces. Access of a memory location through a
+ /// pointer with this address space is expected to be legal but slower
+ /// compared to the same memory location accessed through a pointer with a
+ /// different address space.
+ //
+ /// This is for for targets with different pointer representations which can
+ /// be converted with the addrspacecast instruction. If a pointer is converted
+ /// to this address space, optimizations should attempt to replace the access
+ /// with the source address space.
+ ///
+ /// \returns ~0u if the target does not have such a flat address space to
+ /// optimize away.
+ unsigned getFlatAddressSpace() const;
+
/// \brief Test whether calls to a function lower to actual program function
/// calls.
///
@@ -411,6 +432,16 @@ public:
/// containing this constant value for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const;
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) const;
+
+ /// If target has efficient vector element load/store instructions, it can
+ /// return true here so that insertion/extraction costs are not added to
+ /// the scalarization cost of a load/store.
+ bool supportsEfficientVectorElementLoadStore() const;
+
/// \brief Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
@@ -500,6 +531,12 @@ public:
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
+ /// \return True if it should be considered for address type promotion.
+ /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
+ /// profitable without finding other extensions fed by the same input.
+ bool shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
+
/// \return The size of a cache line in bytes.
unsigned getCacheLineSize() const;
@@ -540,8 +577,10 @@ public:
Type *SubTp = nullptr) const;
/// \return The expected cost of cast instructions, such as bitcast, trunc,
- /// zext, etc.
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
+ /// zext, etc. If there is an existing instruction that holds Opcode, it
+ /// may be passed in the 'I' parameter.
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr) const;
/// \return The expected cost of a sign- or zero-extended vector extract. Use
/// -1 to indicate that there is no information about the index value.
@@ -552,9 +591,11 @@ public:
/// Phi, Ret, Br.
int getCFInstrCost(unsigned Opcode) const;
- /// \returns The expected cost of compare and select instructions.
+ /// \returns The expected cost of compare and select instructions. If there
+ /// is an existing instruction that holds Opcode, it may be passed in the
+ /// 'I' parameter.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy = nullptr) const;
+ Type *CondTy = nullptr, const Instruction *I = nullptr) const;
/// \return The expected cost of vector Insert and Extract.
/// Use -1 to indicate that there is no information on the index value.
@@ -562,7 +603,7 @@ public:
/// \return The cost of Load and Store instructions.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const;
+ unsigned AddressSpace, const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
@@ -605,13 +646,19 @@ public:
/// ((v0+v2), (v1+v3), undef, undef)
int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
- /// \returns The cost of Intrinsic instructions. Types analysis only.
+ /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+ /// Three cases are handled: 1. scalar instruction 2. vector instruction
+ /// 3. scalar instruction which is to be vectorized with VF.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF) const;
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF = 1) const;
- /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+ /// \returns The cost of Intrinsic instructions. Types analysis only.
+ /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
+ /// arguments and the return value will be computed based on types.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF) const;
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX) const;
/// \returns The cost of Call instructions.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
@@ -720,6 +767,7 @@ public:
virtual int getUserCost(const User *U) = 0;
virtual bool hasBranchDivergence() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
+ virtual unsigned getFlatAddressSpace() = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
@@ -743,6 +791,11 @@ public:
virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
+ virtual unsigned
+ getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
+ virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) = 0;
+ virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -763,6 +816,8 @@ public:
Type *Ty) = 0;
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) = 0;
+ virtual bool shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
virtual unsigned getCacheLineSize() = 0;
virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;
@@ -776,16 +831,17 @@ public:
ArrayRef<const Value *> Args) = 0;
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;
- virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
+ virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) = 0;
virtual int getCFInstrCost(unsigned Opcode) = 0;
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) = 0;
+ Type *CondTy, const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) = 0;
+ unsigned AddressSpace, const Instruction *I) = 0;
virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) = 0;
@@ -800,11 +856,10 @@ public:
virtual int getReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys,
- FastMathFlags FMF) = 0;
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args,
- FastMathFlags FMF) = 0;
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
virtual int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
@@ -879,6 +934,11 @@ public:
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
}
+
+ unsigned getFlatAddressSpace() override {
+ return Impl.getFlatAddressSpace();
+ }
+
bool isLoweredToCall(const Function *F) override {
return Impl.isLoweredToCall(F);
}
@@ -933,6 +993,19 @@ public:
bool shouldBuildLookupTablesForConstant(Constant *C) override {
return Impl.shouldBuildLookupTablesForConstant(C);
}
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert,
+ bool Extract) override {
+ return Impl.getScalarizationOverhead(Ty, Insert, Extract);
+ }
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) override {
+ return Impl.getOperandsScalarizationOverhead(Args, VF);
+ }
+
+ bool supportsEfficientVectorElementLoadStore() override {
+ return Impl.supportsEfficientVectorElementLoadStore();
+ }
+
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
@@ -976,7 +1049,11 @@ public:
unsigned getRegisterBitWidth(bool Vector) override {
return Impl.getRegisterBitWidth(Vector);
}
-
+ bool shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
+ return Impl.shouldConsiderAddressTypePromotion(
+ I, AllowPromotionWithoutCommonHeader);
+ }
unsigned getCacheLineSize() override {
return Impl.getCacheLineSize();
}
@@ -1003,8 +1080,9 @@ public:
Type *SubTp) override {
return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
}
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
- return Impl.getCastInstrCost(Opcode, Dst, Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) override {
+ return Impl.getCastInstrCost(Opcode, Dst, Src, I);
}
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index) override {
@@ -1013,15 +1091,16 @@ public:
int getCFInstrCost(unsigned Opcode) override {
return Impl.getCFInstrCost(Opcode);
}
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
- return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) override {
+ return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) override {
- return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ unsigned AddressSpace, const Instruction *I) override {
+ return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
}
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) override {
@@ -1044,13 +1123,13 @@ public:
return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
- FastMathFlags FMF) override {
- return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+ FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
+ return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args,
- FastMathFlags FMF) override {
- return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
+ return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
}
int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) override {
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index cafc40723c9d..9ab6b7445ab8 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -171,6 +171,10 @@ public:
bool isSourceOfDivergence(const Value *V) { return false; }
+ unsigned getFlatAddressSpace () {
+ return -1;
+ }
+
bool isLoweredToCall(const Function *F) {
// FIXME: These should almost certainly not be handled here, and instead
// handled with the help of TLI or the target itself. This was largely
@@ -251,6 +255,15 @@ public:
bool shouldBuildLookupTables() { return true; }
bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
+ return 0;
+ }
+
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) { return 0; }
+
+ bool supportsEfficientVectorElementLoadStore() { return false; }
+
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
bool enableInterleavedAccessVectorization() { return false; }
@@ -292,6 +305,13 @@ public:
unsigned getRegisterBitWidth(bool Vector) { return 32; }
+ bool
+ shouldConsiderAddressTypePromotion(const Instruction &I,
+ bool &AllowPromotionWithoutCommonHeader) {
+ AllowPromotionWithoutCommonHeader = false;
+ return false;
+ }
+
unsigned getCacheLineSize() { return 0; }
unsigned getPrefetchDistance() { return 0; }
@@ -316,7 +336,8 @@ public:
return 1;
}
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return 1; }
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) { return 1; }
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) {
@@ -325,7 +346,8 @@ public:
unsigned getCFInstrCost(unsigned Opcode) { return 1; }
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
return 1;
}
@@ -334,7 +356,7 @@ public:
}
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
return 1;
}
@@ -358,11 +380,12 @@ public:
}
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF) {
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
return 1;
}
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF) {
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
return 1;
}
diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h
index c3f688f5d7f1..17906ba4e392 100644
--- a/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/include/llvm/Analysis/TypeMetadataUtils.h
@@ -32,14 +32,15 @@ struct DevirtCallSite {
/// call sites based on the call and return them in DevirtCalls.
void findDevirtualizableCallsForTypeTest(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- SmallVectorImpl<CallInst *> &Assumes, CallInst *CI);
+ SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI);
/// Given a call to the intrinsic @llvm.type.checked.load, find all
/// devirtualizable call sites based on the call and return them in DevirtCalls.
void findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
SmallVectorImpl<Instruction *> &LoadedPtrs,
- SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, CallInst *CI);
+ SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
+ const CallInst *CI);
}
#endif
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index aaf6f888e06f..e3c2f3bed227 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -31,6 +31,7 @@ template <typename T> class ArrayRef;
class Instruction;
class Loop;
class LoopInfo;
+ class OptimizationRemarkEmitter;
class MDNode;
class StringRef;
class TargetLibraryInfo;
@@ -52,7 +53,8 @@ template <typename T> class ArrayRef;
const DataLayout &DL, unsigned Depth = 0,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
- const DominatorTree *DT = nullptr);
+ const DominatorTree *DT = nullptr,
+ OptimizationRemarkEmitter *ORE = nullptr);
/// Compute known bits from the range metadata.
/// \p KnownZero the set of bits that are known to be zero
/// \p KnownOne the set of bits that are known to be one
@@ -86,8 +88,10 @@ template <typename T> class ArrayRef;
/// Return true if the given value is known to be non-zero when defined. For
/// vectors, return true if every element is known to be non-zero when
- /// defined. Supports values with integer or pointer type and vectors of
- /// integers.
+ /// defined. For pointers, if the context instruction and dominator tree are
+ /// specified, perform context-sensitive analysis and return true if the
+ /// pointer couldn't possibly be null at the specified instruction.
+ /// Supports values with integer or pointer type and vectors of integers.
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
@@ -167,13 +171,25 @@ template <typename T> class ArrayRef;
bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
unsigned Depth = 0);
- /// Return true if we can prove that the specified FP value is either a NaN or
- /// never less than 0.0.
- /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0.
+ /// Return true if we can prove that the specified FP value is either NaN or
+ /// never less than -0.0.
+ ///
+ /// NaN --> true
+ /// +0 --> true
+ /// -0 --> true
+ /// x > +0 --> true
+ /// x < -0 --> false
+ ///
bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
- /// \returns true if we can prove that the specified FP value has a 0 sign
- /// bit.
+ /// Return true if we can prove that the specified FP value's sign bit is 0.
+ ///
+ /// NaN --> true/false (depending on the NaN's sign bit)
+ /// +0 --> true
+ /// -0 --> false
+ /// x > +0 --> true
+ /// x < -0 --> false
+ ///
bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
/// If the specified value can be set by repeating the same byte in memory,
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index eaa068b89c77..6315e8408f05 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -1,4 +1,4 @@
-//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
+//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,11 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
-#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
+#ifndef LLVM_ANALYSIS_VECTORUTILS_H
+#define LLVM_ANALYSIS_VECTORUTILS_H
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/IRBuilder.h"
namespace llvm {
@@ -123,6 +124,58 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
/// This function always sets a (possibly null) value for each K in Kinds.
Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);
+/// \brief Create an interleave shuffle mask.
+///
+/// This function creates a shuffle mask for interleaving \p NumVecs vectors of
+/// vectorization factor \p VF into a single wide vector. The mask is of the
+/// form:
+///
+/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>
+///
+/// For example, the mask for VF = 4 and NumVecs = 2 is:
+///
+/// <0, 4, 1, 5, 2, 6, 3, 7>.
+Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
+ unsigned NumVecs);
+
+/// \brief Create a stride shuffle mask.
+///
+/// This function creates a shuffle mask whose elements begin at \p Start and
+/// are incremented by \p Stride. The mask can be used to deinterleave an
+/// interleaved vector into separate vectors of vectorization factor \p VF. The
+/// mask is of the form:
+///
+/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)>
+///
+/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
+///
+/// <0, 2, 4, 6>
+Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned Stride, unsigned VF);
+
+/// \brief Create a sequential shuffle mask.
+///
+/// This function creates shuffle mask whose elements are sequential and begin
+/// at \p Start. The mask contains \p NumInts integers and is padded with \p
+/// NumUndefs undef values. The mask is of the form:
+///
+/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>
+///
+/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
+///
+/// <0, 1, 2, 3, undef, undef, undef, undef>
+Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned NumInts, unsigned NumUndefs);
+
+/// \brief Concatenate a list of vectors.
+///
+/// This function generates code that concatenate the vectors in \p Vecs into a
+/// single large vector. The number of vectors should be greater than one, and
+/// their element types should be the same. The number of elements in the
+/// vectors should also be the same; however, if the last vector has fewer
+/// elements, it will be padded with undefs.
+Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
+
} // llvm namespace
#endif