diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /include/llvm/Analysis | |
parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) |
Diffstat (limited to 'include/llvm/Analysis')
42 files changed, 2408 insertions, 713 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index d8e50438e722..1b8b9751faa1 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -443,11 +443,7 @@ public: /// getModRefInfo (for fences) - Return information about whether /// a particular store modifies or reads the specified memory location. - ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) { - // Conservatively correct. (We could possibly be a bit smarter if - // Loc is a alloca that doesn't escape.) - return MRI_ModRef; - } + ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc); /// getModRefInfo (for fences) - A convenience wrapper. ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size) { @@ -528,6 +524,14 @@ public: /// Check whether or not an instruction may read or write the specified /// memory location. /// + /// Note explicitly that getModRefInfo considers the effects of reading and + /// writing the memory location, and not the effect of ordering relative to + /// other instructions. Thus, a volatile load is considered to be Ref, + /// because it does not actually write memory, it just can't be reordered + /// relative to other volatiles (or removed). Atomic ordered loads/stores are + /// considered ModRef ATM because conservatively, the visible effect appears + /// as if memory was written, not just an ordering constraint. + /// /// An instruction that doesn't read or write memory may be trivially LICM'd /// for example. /// diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 5d11b22c6eed..eac97501c759 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -121,7 +121,10 @@ class AliasSet : public ilist_node<AliasSet> { AliasSet *Forward; /// All instructions without a specific address in this alias set. - std::vector<AssertingVH<Instruction> > UnknownInsts; + /// In rare cases this vector can have a null'ed out WeakVH + /// instances (can happen if some other loop pass deletes an + /// instruction in this list). + std::vector<WeakVH> UnknownInsts; /// Number of nodes pointing to this AliasSet plus the number of AliasSets /// forwarding to it. @@ -171,7 +174,7 @@ class AliasSet : public ilist_node<AliasSet> { Instruction *getUnknownInst(unsigned i) const { assert(i < UnknownInsts.size()); - return UnknownInsts[i]; + return cast_or_null<Instruction>(UnknownInsts[i]); } public: diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index 79287ed76f2e..f833f417c7dd 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -31,11 +31,10 @@ namespace llvm { /// \brief A cache of @llvm.assume calls within a function. /// /// This cache provides fast lookup of assumptions within a function by caching -/// them and amortizing the cost of scanning for them across all queries. The -/// cache is also conservatively self-updating so that it will never return -/// incorrect results about a function even as the function is being mutated. -/// However, flushing the cache and rebuilding it (or explicitly updating it) -/// may allow it to discover new assumptions. +/// them and amortizing the cost of scanning for them across all queries. Passes +/// that create new assumptions are required to call registerAssumption() to +/// register any new @llvm.assume calls that they create. Deletions of +/// @llvm.assume calls do not require special handling. class AssumptionCache { /// \brief The function for which this cache is handling assumptions. /// @@ -87,6 +86,13 @@ public: /// its instructions. AssumptionCache(Function &F) : F(F), Scanned(false) {} + /// This cache is designed to be self-updating and so it should never be + /// invalidated. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &) { + return false; + } + /// \brief Add an @llvm.assume intrinsic to this function's cache. /// /// The call passed in must be an instruction within this function and must @@ -196,7 +202,10 @@ public: AssumptionCacheTracker(); ~AssumptionCacheTracker() override; - void releaseMemory() override { AssumptionCaches.shrink_and_clear(); } + void releaseMemory() override { + verifyAnalysis(); + AssumptionCaches.shrink_and_clear(); + } void verifyAnalysis() const override; bool doFinalization(Module &) override { diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h index addfffa01061..14e4bded264a 100644 --- a/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/include/llvm/Analysis/BasicAliasAnalysis.h @@ -233,6 +233,24 @@ FunctionPass *createBasicAAWrapperPass(); /// populated to the best of our ability for a particular function when inside /// of a \c ModulePass or a \c CallGraphSCCPass. BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F); + +/// This class is a functor to be used in legacy module or SCC passes for +/// computing AA results for a function. We store the results in fields so that +/// they live long enough to be queried, but we re-use them each time. +class LegacyAARGetter { + Pass &P; + Optional<BasicAAResult> BAR; + Optional<AAResults> AAR; + +public: + LegacyAARGetter(Pass &P) : P(P) {} + AAResults &operator()(Function &F) { + BAR.emplace(createLegacyPMBasicAAResult(P, F)); + AAR.emplace(createLegacyPMAAResults(P, F, *BAR)); + return *AAR; + } +}; + } #endif diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h index 562041d11fa1..cbae01c9102f 100644 --- a/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/include/llvm/Analysis/BlockFrequencyInfo.h @@ -45,6 +45,10 @@ public: ~BlockFrequencyInfo(); + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + const Function *getFunction() const; const BranchProbabilityInfo *getBPI() const; void view() const; @@ -69,6 +73,12 @@ public: // Set the frequency of the given basic block. void setBlockFreq(const BasicBlock *BB, uint64_t Freq); + /// Set the frequency of \p ReferenceBB to \p Freq and scale the frequencies + /// of the blocks in \p BlocksToScale such that their frequencies relative + /// to \p ReferenceBB remain unchanged. + void setBlockFreqAndScale(const BasicBlock *ReferenceBB, uint64_t Freq, + SmallPtrSetImpl<BasicBlock *> &BlocksToScale); + /// calculate - compute block frequency info for the given function. void calculate(const Function &F, const BranchProbabilityInfo &BPI, const LoopInfo &LI); diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 3f4428d18740..e3d81fea49ea 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1291,11 +1291,14 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { } std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph, - GVDAGType GType) { + GVDAGType GType, int layout_order = -1) { std::string Result; raw_string_ostream OS(Result); - OS << Node->getName().str() << " : "; + if (layout_order != -1) + OS << Node->getName() << "[" << layout_order << "] : "; + else + OS << Node->getName() << " : "; switch (GType) { case GVDT_Fraction: Graph->printBlockFreq(OS, Node); diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 14b7a7f529f7..6a876679543d 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -164,6 +164,8 @@ private: /// \brief Track the set of blocks that always lead to a cold call. SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall; + void updatePostDominatedByUnreachable(const BasicBlock *BB); + void updatePostDominatedByColdCall(const BasicBlock *BB); bool calcUnreachableHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); bool calcColdCallHeuristics(const BasicBlock *BB); diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h index efaa9d6df8ea..5786769cc500 100644 --- a/include/llvm/Analysis/CFGPrinter.h +++ b/include/llvm/Analysis/CFGPrinter.h @@ -140,8 +140,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits { std::string Str; raw_string_ostream OS(Str); - SwitchInst::ConstCaseIt Case = - SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); + auto Case = *SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); OS << Case.getCaseValue()->getValue(); return OS.str(); } diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index 6fbe532112b2..398bbfb0c413 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -334,6 +334,7 @@ public: InvalidSCCSet, nullptr, nullptr}; PreservedAnalyses PA = PreservedAnalyses::all(); + CG.buildRefSCCs(); for (auto RCI = CG.postorder_ref_scc_begin(), RCE = CG.postorder_ref_scc_end(); RCI != RCE;) { diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h index 4ecbaa75ac75..ea85436ee580 100644 --- a/include/llvm/Analysis/CallGraph.h +++ b/include/llvm/Analysis/CallGraph.h @@ -272,7 +272,7 @@ public: private: friend class CallGraph; - AssertingVH<Function> F; + Function *F; std::vector<CallRecord> CalledFunctions; diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index 517842c8b0dc..ff6ca1959153 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -100,6 +100,12 @@ Constant *ConstantFoldExtractValueInstruction(Constant *Agg, /// successful; if not, null is returned. Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx); +/// \brief Attempt to constant fold a shufflevector instruction with the +/// specified operands and indices. The constant result is returned if +/// successful; if not, null is returned. +Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, + Constant *Mask); + /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would /// produce if it is constant and determinable. If this is not determinable, /// return null. diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h index b9667f801ed3..8cae63c3c869 100644 --- a/include/llvm/Analysis/DominanceFrontier.h +++ b/include/llvm/Analysis/DominanceFrontier.h @@ -141,6 +141,10 @@ public: typedef DominanceFrontierBase<BasicBlock>::DomSetType DomSetType; typedef DominanceFrontierBase<BasicBlock>::iterator iterator; typedef DominanceFrontierBase<BasicBlock>::const_iterator const_iterator; + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); }; class DominanceFrontierWrapperPass : public FunctionPass { diff --git a/include/llvm/Analysis/IndirectCallSiteVisitor.h b/include/llvm/Analysis/IndirectCallSiteVisitor.h index 71a8cb886321..3c40cc0235cc 100644 --- a/include/llvm/Analysis/IndirectCallSiteVisitor.h +++ b/include/llvm/Analysis/IndirectCallSiteVisitor.h @@ -21,16 +21,8 @@ struct PGOIndirectCallSiteVisitor PGOIndirectCallSiteVisitor() {} void visitCallSite(CallSite CS) { - if (CS.getCalledFunction() || !CS.getCalledValue()) - return; - Instruction *I = CS.getInstruction(); - if (CallInst *CI = dyn_cast<CallInst>(I)) { - if (CI->isInlineAsm()) - return; - } - if (isa<Constant>(CS.getCalledValue())) - return; - IndirectCallInsts.push_back(I); + if (CS.isIndirectCall()) + IndirectCallInsts.push_back(CS.getInstruction()); } }; diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index 5e7b00261f63..17e5cb6db02d 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -21,6 +21,7 @@ namespace llvm { class AssumptionCacheTracker; +class BlockFrequencyInfo; class CallSite; class DataLayout; class Function; @@ -137,6 +138,9 @@ struct InlineParams { /// Threshold to use when the callsite is considered hot. Optional<int> HotCallSiteThreshold; + + /// Threshold to use when the callsite is considered cold. + Optional<int> ColdCallSiteThreshold; }; /// Generate the parameters to tune the inline cost analysis based only on the @@ -171,6 +175,7 @@ InlineCost getInlineCost(CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI); /// \brief Get an InlineCost with the callee explicitly specified. @@ -182,6 +187,7 @@ InlineCost getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, ProfileSummaryInfo *PSI); /// \brief Minimal filter to detect invalid constructs for inlining. diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index 47d6118313cb..b829e995db05 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -42,6 +42,7 @@ namespace llvm { class Instruction; class DataLayout; class FastMathFlags; + class OptimizationRemarkEmitter; class TargetLibraryInfo; class Type; class Value; @@ -246,6 +247,14 @@ namespace llvm { AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr); + /// Given operands for a ShuffleVectorInst, fold the result or return null. + Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const DataLayout &DL, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr); + //=== Helper functions for higher up the class hierarchy. @@ -296,7 +305,8 @@ namespace llvm { Value *SimplifyInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); + AssumptionCache *AC = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. /// diff --git a/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/include/llvm/Analysis/LazyBlockFrequencyInfo.h index 5a02b9dce463..71ce0842f6a9 100644 --- a/include/llvm/Analysis/LazyBlockFrequencyInfo.h +++ b/include/llvm/Analysis/LazyBlockFrequencyInfo.h @@ -9,7 +9,7 @@ // // This is an alternative analysis pass to BlockFrequencyInfoWrapperPass. The // difference is that with this pass the block frequencies are not computed when -// the analysis pass is executed but rather when the BFI results is explicitly +// the analysis pass is executed but rather when the BFI result is explicitly // requested by the analysis client. // //===----------------------------------------------------------------------===// @@ -27,10 +27,58 @@ class BranchProbabilityInfo; class Function; class LoopInfo; +/// Wraps a BFI to allow lazy computation of the block frequencies. +/// +/// A pass that only conditionally uses BFI can uncondtionally require the +/// analysis without paying for the overhead if BFI doesn't end up being used. +template <typename FunctionT, typename BranchProbabilityInfoPassT, + typename LoopInfoT, typename BlockFrequencyInfoT> +class LazyBlockFrequencyInfo { +public: + LazyBlockFrequencyInfo() + : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {} + + /// Set up the per-function input. + void setAnalysis(const FunctionT *F, BranchProbabilityInfoPassT *BPIPass, + const LoopInfoT *LI) { + this->F = F; + this->BPIPass = BPIPass; + this->LI = LI; + } + + /// Retrieve the BFI with the block frequencies computed. + BlockFrequencyInfoT &getCalculated() { + if (!Calculated) { + assert(F && BPIPass && LI && "call setAnalysis"); + BFI.calculate( + *F, BPIPassTrait<BranchProbabilityInfoPassT>::getBPI(BPIPass), *LI); + Calculated = true; + } + return BFI; + } + + const BlockFrequencyInfoT &getCalculated() const { + return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated(); + } + + void releaseMemory() { + BFI.releaseMemory(); + Calculated = false; + setAnalysis(nullptr, nullptr, nullptr); + } + +private: + BlockFrequencyInfoT BFI; + bool Calculated; + const FunctionT *F; + BranchProbabilityInfoPassT *BPIPass; + const LoopInfoT *LI; +}; + /// \brief This is an alternative analysis pass to /// BlockFrequencyInfoWrapperPass. The difference is that with this pass the /// block frequencies are not computed when the analysis pass is executed but -/// rather when the BFI results is explicitly requested by the analysis client. +/// rather when the BFI result is explicitly requested by the analysis client. /// /// There are some additional requirements for any client pass that wants to use /// the analysis: @@ -49,54 +97,12 @@ class LoopInfo; /// /// Note that it is expected that we wouldn't need this functionality for the /// new PM since with the new PM, analyses are executed on demand. -class LazyBlockFrequencyInfoPass : public FunctionPass { - - /// Wraps a BFI to allow lazy computation of the block frequencies. - /// - /// A pass that only conditionally uses BFI can uncondtionally require the - /// analysis without paying for the overhead if BFI doesn't end up being used. - class LazyBlockFrequencyInfo { - public: - LazyBlockFrequencyInfo() - : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {} - - /// Set up the per-function input. - void setAnalysis(const Function *F, LazyBranchProbabilityInfoPass *BPIPass, - const LoopInfo *LI) { - this->F = F; - this->BPIPass = BPIPass; - this->LI = LI; - } - /// Retrieve the BFI with the block frequencies computed. - BlockFrequencyInfo &getCalculated() { - if (!Calculated) { - assert(F && BPIPass && LI && "call setAnalysis"); - BFI.calculate(*F, BPIPass->getBPI(), *LI); - Calculated = true; - } - return BFI; - } - - const BlockFrequencyInfo &getCalculated() const { - return const_cast<LazyBlockFrequencyInfo *>(this)->getCalculated(); - } - - void releaseMemory() { - BFI.releaseMemory(); - Calculated = false; - setAnalysis(nullptr, nullptr, nullptr); - } - - private: - BlockFrequencyInfo BFI; - bool Calculated; - const Function *F; - LazyBranchProbabilityInfoPass *BPIPass; - const LoopInfo *LI; - }; - - LazyBlockFrequencyInfo LBFI; +class LazyBlockFrequencyInfoPass : public FunctionPass { +private: + LazyBlockFrequencyInfo<Function, LazyBranchProbabilityInfoPass, LoopInfo, + BlockFrequencyInfo> + LBFI; public: static char ID; diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h index c76fa1e819ae..067d7ebfd1f5 100644 --- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -105,5 +105,17 @@ public: /// \brief Helper for client passes to initialize dependent passes for LBPI. void initializeLazyBPIPassPass(PassRegistry &Registry); + +/// \brief Simple trait class that provides a mapping between BPI passes and the +/// corresponding BPInfo. +template <typename PassT> struct BPIPassTrait { + static PassT &getBPI(PassT *P) { return *P; } +}; + +template <> struct BPIPassTrait<LazyBranchProbabilityInfoPass> { + static BranchProbabilityInfo &getBPI(LazyBranchProbabilityInfoPass *P) { + return P->getBPI(); + } +}; } #endif diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index bca0aebe2eef..ad7f5c80549f 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -106,6 +106,7 @@ class raw_ostream; class LazyCallGraph { public: class Node; + class EdgeSequence; class SCC; class RefSCC; class edge_iterator; @@ -121,16 +122,6 @@ public: /// inherently reference edges, and so the reference graph forms a superset /// of the formal call graph. /// - /// Furthermore, edges also may point to raw \c Function objects when those - /// functions have not been scanned and incorporated into the graph (yet). - /// This is one of the primary ways in which the graph can be lazy. When - /// functions are scanned and fully incorporated into the graph, all of the - /// edges referencing them are updated to point to the graph \c Node objects - /// instead of to the raw \c Function objects. This class even provides - /// methods to trigger this scan on-demand by attempting to get the target - /// node of the graph and providing a reference back to the graph in order to - /// lazily build it if necessary. - /// /// All of these forms of edges are fundamentally represented as outgoing /// edges. The edges are stored in the source node and point at the target /// node. This allows the edge structure itself to be a very compact data @@ -141,7 +132,6 @@ public: enum Kind : bool { Ref = false, Call = true }; Edge(); - explicit Edge(Function &F, Kind K); explicit Edge(Node &N, Kind K); /// Test whether the edge is null. @@ -158,197 +148,251 @@ public: /// This requires that the edge is not null. bool isCall() const; - /// Get the function referenced by this edge. - /// - /// This requires that the edge is not null, but will succeed whether we - /// have built a graph node for the function yet or not. - Function &getFunction() const; - - /// Get the call graph node referenced by this edge if one exists. + /// Get the call graph node referenced by this edge. /// - /// This requires that the edge is not null. If we have built a graph node - /// for the function this edge points to, this will return that node, - /// otherwise it will return null. - Node *getNode() const; + /// This requires that the edge is not null. + Node &getNode() const; - /// Get the call graph node for this edge, building it if necessary. + /// Get the function referenced by this edge. /// - /// This requires that the edge is not null. If we have not yet built - /// a graph node for the function this edge points to, this will first ask - /// the graph to build that node, inserting it into all the relevant - /// structures. - Node &getNode(LazyCallGraph &G); + /// This requires that the edge is not null. + Function &getFunction() const; private: - friend class LazyCallGraph::Node; + friend class LazyCallGraph::EdgeSequence; friend class LazyCallGraph::RefSCC; - PointerIntPair<PointerUnion<Function *, Node *>, 1, Kind> Value; + PointerIntPair<Node *, 1, Kind> Value; void setKind(Kind K) { Value.setInt(K); } }; - typedef SmallVector<Edge, 4> EdgeVectorT; - typedef SmallVectorImpl<Edge> EdgeVectorImplT; - - /// A node in the call graph. + /// The edge sequence object. /// - /// This represents a single node. It's primary roles are to cache the list of - /// callees, de-duplicate and provide fast testing of whether a function is - /// a callee, and facilitate iteration of child nodes in the graph. - class Node { + /// This typically exists entirely within the node but is exposed as + /// a separate type because a node doesn't initially have edges. An explicit + /// population step is required to produce this sequence at first and it is + /// then cached in the node. It is also used to represent edges entering the + /// graph from outside the module to model the graph's roots. + /// + /// The sequence itself both iterable and indexable. The indexes remain + /// stable even as the sequence mutates (including removal). + class EdgeSequence { friend class LazyCallGraph; - friend class LazyCallGraph::SCC; + friend class LazyCallGraph::Node; friend class LazyCallGraph::RefSCC; - LazyCallGraph *G; - Function &F; + typedef SmallVector<Edge, 4> VectorT; + typedef SmallVectorImpl<Edge> VectorImplT; - // We provide for the DFS numbering and Tarjan walk lowlink numbers to be - // stored directly within the node. These are both '-1' when nodes are part - // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk. - int DFSNumber; - int LowLink; + public: + /// An iterator used for the edges to both entry nodes and child nodes. + class iterator + : public iterator_adaptor_base<iterator, VectorImplT::iterator, + std::forward_iterator_tag> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + VectorImplT::iterator E; + + // Build the iterator for a specific position in the edge list. + iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E) + : iterator_adaptor_base(BaseI), E(E) { + while (I != E && !*I) + ++I; + } - mutable EdgeVectorT Edges; - DenseMap<Function *, int> EdgeIndexMap; + public: + iterator() {} - /// Basic constructor implements the scanning of F into Edges and - /// EdgeIndexMap. - Node(LazyCallGraph &G, Function &F); + using iterator_adaptor_base::operator++; + iterator &operator++() { + do { + ++I; + } while (I != E && !*I); + return *this; + } + }; - /// Internal helper to insert an edge to a function. - void insertEdgeInternal(Function &ChildF, Edge::Kind EK); + /// An iterator over specifically call edges. + /// + /// This has the same iteration properties as the \c iterator, but + /// restricts itself to edges which represent actual calls. + class call_iterator + : public iterator_adaptor_base<call_iterator, VectorImplT::iterator, + std::forward_iterator_tag> { + friend class LazyCallGraph; + friend class LazyCallGraph::Node; + + VectorImplT::iterator E; + + /// Advance the iterator to the next valid, call edge. + void advanceToNextEdge() { + while (I != E && (!*I || !I->isCall())) + ++I; + } - /// Internal helper to insert an edge to a node. - void insertEdgeInternal(Node &ChildN, Edge::Kind EK); + // Build the iterator for a specific position in the edge list. + call_iterator(VectorImplT::iterator BaseI, VectorImplT::iterator E) + : iterator_adaptor_base(BaseI), E(E) { + advanceToNextEdge(); + } - /// Internal helper to change an edge kind. - void setEdgeKind(Function &ChildF, Edge::Kind EK); + public: + call_iterator() {} - /// Internal helper to remove the edge to the given function. - void removeEdgeInternal(Function &ChildF); + using iterator_adaptor_base::operator++; + call_iterator &operator++() { + ++I; + advanceToNextEdge(); + return *this; + } + }; - void clear() { - Edges.clear(); - EdgeIndexMap.clear(); - } + iterator begin() { return iterator(Edges.begin(), Edges.end()); } + iterator end() { return iterator(Edges.end(), Edges.end()); } - /// Print the name of this node's function. - friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) { - return OS << N.F.getName(); + Edge &operator[](int i) { return Edges[i]; } + Edge &operator[](Node &N) { + assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!"); + return Edges[EdgeIndexMap.find(&N)->second]; } - - /// Dump the name of this node's function to stderr. - void dump() const; - - public: - LazyCallGraph &getGraph() const { return *G; } - - Function &getFunction() const { return F; } - - edge_iterator begin() const { - return edge_iterator(Edges.begin(), Edges.end()); + Edge *lookup(Node &N) { + auto EI = EdgeIndexMap.find(&N); + return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr; } - edge_iterator end() const { return edge_iterator(Edges.end(), Edges.end()); } - const Edge &operator[](int i) const { return Edges[i]; } - const Edge &operator[](Function &F) const { - assert(EdgeIndexMap.find(&F) != EdgeIndexMap.end() && "No such edge!"); - return Edges[EdgeIndexMap.find(&F)->second]; + call_iterator call_begin() { + return call_iterator(Edges.begin(), Edges.end()); } - const Edge &operator[](Node &N) const { return (*this)[N.getFunction()]; } + call_iterator call_end() { return call_iterator(Edges.end(), Edges.end()); } - const Edge *lookup(Function &F) const { - auto EI = EdgeIndexMap.find(&F); - return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr; + iterator_range<call_iterator> calls() { + return make_range(call_begin(), call_end()); } - call_edge_iterator call_begin() const { - return call_edge_iterator(Edges.begin(), Edges.end()); - } - call_edge_iterator call_end() const { - return call_edge_iterator(Edges.end(), Edges.end()); - } + bool empty() { + for (auto &E : Edges) + if (E) + return false; - iterator_range<call_edge_iterator> calls() const { - return make_range(call_begin(), call_end()); + return true; } - /// Equality is defined as address equality. - bool operator==(const Node &N) const { return this == &N; } - bool operator!=(const Node &N) const { return !operator==(N); } - }; + private: + VectorT Edges; + DenseMap<Node *, int> EdgeIndexMap; - /// A lazy iterator used for both the entry nodes and child nodes. - /// - /// When this iterator is dereferenced, if not yet available, a function will - /// be scanned for "calls" or uses of functions and its child information - /// will be constructed. All of these results are accumulated and cached in - /// the graph. - class edge_iterator - : public iterator_adaptor_base<edge_iterator, EdgeVectorImplT::iterator, - std::forward_iterator_tag> { - friend class LazyCallGraph; - friend class LazyCallGraph::Node; + EdgeSequence() = default; - EdgeVectorImplT::iterator E; + /// Internal helper to insert an edge to a node. + void insertEdgeInternal(Node &ChildN, Edge::Kind EK); - // Build the iterator for a specific position in the edge list. - edge_iterator(EdgeVectorImplT::iterator BaseI, - EdgeVectorImplT::iterator E) - : iterator_adaptor_base(BaseI), E(E) { - while (I != E && !*I) - ++I; - } + /// Internal helper to change an edge kind. + void setEdgeKind(Node &ChildN, Edge::Kind EK); - public: - edge_iterator() {} + /// Internal helper to remove the edge to the given function. + bool removeEdgeInternal(Node &ChildN); - using iterator_adaptor_base::operator++; - edge_iterator &operator++() { - do { - ++I; - } while (I != E && !*I); - return *this; - } + /// Internal helper to replace an edge key with a new one. + /// + /// This should be used when the function for a particular node in the + /// graph gets replaced and we are updating all of the edges to that node + /// to use the new function as the key. + void replaceEdgeKey(Function &OldTarget, Function &NewTarget); }; - /// A lazy iterator over specifically call edges. + /// A node in the call graph. + /// + /// This represents a single node. It's primary roles are to cache the list of + /// callees, de-duplicate and provide fast testing of whether a function is + /// a callee, and facilitate iteration of child nodes in the graph. /// - /// This has the same iteration properties as the \c edge_iterator, but - /// restricts itself to edges which represent actual calls. - class call_edge_iterator - : public iterator_adaptor_base<call_edge_iterator, - EdgeVectorImplT::iterator, - std::forward_iterator_tag> { + /// The node works much like an optional in order to lazily populate the + /// edges of each node. Until populated, there are no edges. Once populated, + /// you can access the edges by dereferencing the node or using the `->` + /// operator as if the node was an `Optional<EdgeSequence>`. + class Node { friend class LazyCallGraph; - friend class LazyCallGraph::Node; + friend class LazyCallGraph::RefSCC; - EdgeVectorImplT::iterator E; + public: + LazyCallGraph &getGraph() const { return *G; } - /// Advance the iterator to the next valid, call edge. - void advanceToNextEdge() { - while (I != E && (!*I || !I->isCall())) - ++I; + Function &getFunction() const { return *F; } + + StringRef getName() const { return F->getName(); } + + /// Equality is defined as address equality. + bool operator==(const Node &N) const { return this == &N; } + bool operator!=(const Node &N) const { return !operator==(N); } + + /// Tests whether the node has been populated with edges. + operator bool() const { return Edges.hasValue(); } + + // We allow accessing the edges by dereferencing or using the arrow + // operator, essentially wrapping the internal optional. + EdgeSequence &operator*() const { + // Rip const off because the node itself isn't changing here. + return const_cast<EdgeSequence &>(*Edges); } + EdgeSequence *operator->() const { return &**this; } - // Build the iterator for a specific position in the edge list. - call_edge_iterator(EdgeVectorImplT::iterator BaseI, - EdgeVectorImplT::iterator E) - : iterator_adaptor_base(BaseI), E(E) { - advanceToNextEdge(); + /// Populate the edges of this node if necessary. + /// + /// The first time this is called it will populate the edges for this node + /// in the graph. It does this by scanning the underlying function, so once + /// this is done, any changes to that function must be explicitly reflected + /// in updates to the graph. + /// + /// \returns the populated \c EdgeSequence to simplify walking it. + /// + /// This will not update or re-scan anything if called repeatedly. Instead, + /// the edge sequence is cached and returned immediately on subsequent + /// calls. + EdgeSequence &populate() { + if (Edges) + return *Edges; + + return populateSlow(); } - public: - call_edge_iterator() {} + private: + LazyCallGraph *G; + Function *F; - using iterator_adaptor_base::operator++; - call_edge_iterator &operator++() { - ++I; - advanceToNextEdge(); - return *this; + // We provide for the DFS numbering and Tarjan walk lowlink numbers to be + // stored directly within the node. These are both '-1' when nodes are part + // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk. + int DFSNumber; + int LowLink; + + Optional<EdgeSequence> Edges; + + /// Basic constructor implements the scanning of F into Edges and + /// EdgeIndexMap. + Node(LazyCallGraph &G, Function &F) + : G(&G), F(&F), DFSNumber(0), LowLink(0) {} + + /// Implementation of the scan when populating. + EdgeSequence &populateSlow(); + + /// Internal helper to directly replace the function with a new one. + /// + /// This is used to facilitate tranfsormations which need to replace the + /// formal Function object but directly move the body and users from one to + /// the other. + void replaceFunction(Function &NewF); + + void clear() { Edges.reset(); } + + /// Print the name of this node's function. + friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) { + return OS << N.F->getName(); } + + /// Dump the name of this node's function to stderr. + void dump() const; }; /// An SCC of the call graph. @@ -789,19 +833,26 @@ public: /// already existing edges. void insertTrivialRefEdge(Node &SourceN, Node &TargetN); + /// Directly replace a node's function with a new function. + /// + /// This should be used when moving the body and users of a function to + /// a new formal function object but not otherwise changing the call graph + /// structure in any way. + /// + /// It requires that the old function in the provided node have zero uses + /// and the new function must have calls and references to it establishing + /// an equivalent graph. + void replaceNodeFunction(Node &N, Function &NewF); + ///@} }; /// A post-order depth-first RefSCC iterator over the call graph. /// - /// This iterator triggers the Tarjan DFS-based formation of the RefSCC (and - /// SCC) DAG for the call graph, walking it lazily in depth-first post-order. - /// That is, it always visits RefSCCs for the target of a reference edge - /// prior to visiting the RefSCC for a source of the edge (when they are in - /// different RefSCCs). - /// - /// When forming each RefSCC, the call edges within it are used to form SCCs - /// within it, so iterating this also controls the lazy formation of SCCs. + /// This iterator walks the cached post-order sequence of RefSCCs. However, + /// it trades stability for flexibility. It is restricted to a forward + /// iterator but will survive mutations which insert new RefSCCs and continue + /// to point to the same RefSCC even if it moves in the post-order sequence. class postorder_ref_scc_iterator : public iterator_facade_base<postorder_ref_scc_iterator, std::forward_iterator_tag, RefSCC> { @@ -825,12 +876,9 @@ public: /// populating it if necessary. static RefSCC *getRC(LazyCallGraph &G, int Index) { if (Index == (int)G.PostOrderRefSCCs.size()) - if (!G.buildNextRefSCCInPostOrder()) - // We're at the end. - return nullptr; + // We're at the end. + return nullptr; - assert(Index < (int)G.PostOrderRefSCCs.size() && - "Built the next post-order RefSCC without growing list!"); return G.PostOrderRefSCCs[Index]; } @@ -859,17 +907,21 @@ public: LazyCallGraph(LazyCallGraph &&G); LazyCallGraph &operator=(LazyCallGraph &&RHS); - edge_iterator begin() { - return edge_iterator(EntryEdges.begin(), EntryEdges.end()); - } - edge_iterator end() { - return edge_iterator(EntryEdges.end(), EntryEdges.end()); - } + EdgeSequence::iterator begin() { return EntryEdges.begin(); } + EdgeSequence::iterator end() { return EntryEdges.end(); } + + void buildRefSCCs(); postorder_ref_scc_iterator postorder_ref_scc_begin() { + if (!EntryEdges.empty()) + assert(!PostOrderRefSCCs.empty() && + "Must form RefSCCs before iterating them!"); return postorder_ref_scc_iterator(*this); } postorder_ref_scc_iterator postorder_ref_scc_end() { + if (!EntryEdges.empty()) + assert(!PostOrderRefSCCs.empty() && + "Must form RefSCCs before iterating them!"); return postorder_ref_scc_iterator(*this, postorder_ref_scc_iterator::IsAtEndT()); } @@ -920,19 +972,19 @@ public: /// below. /// Update the call graph after inserting a new edge. - void insertEdge(Node &Caller, Function &Callee, Edge::Kind EK); + void insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK); /// Update the call graph after inserting a new edge. - void insertEdge(Function &Caller, Function &Callee, Edge::Kind EK) { - return insertEdge(get(Caller), Callee, EK); + void insertEdge(Function &Source, Function &Target, Edge::Kind EK) { + return insertEdge(get(Source), get(Target), EK); } /// Update the call graph after deleting an edge. - void removeEdge(Node &Caller, Function &Callee); + void removeEdge(Node &SourceN, Node &TargetN); /// Update the call graph after deleting an edge. - void removeEdge(Function &Caller, Function &Callee) { - return removeEdge(get(Caller), Callee); + void removeEdge(Function &Source, Function &Target) { + return removeEdge(get(Source), get(Target)); } ///@} @@ -1013,14 +1065,11 @@ private: /// Maps function->node for fast lookup. DenseMap<const Function *, Node *> NodeMap; - /// The entry nodes to the graph. + /// The entry edges into the graph. /// - /// These nodes are reachable through "external" means. Put another way, they + /// These edges are from "external" sources. Put another way, they /// escape at the module scope. - EdgeVectorT EntryEdges; - - /// Map of the entry nodes in the graph to their indices in \c EntryEdges. - DenseMap<Function *, int> EntryIndexMap; + EdgeSequence EntryEdges; /// Allocator that holds all the call graph SCCs. SpecificBumpPtrAllocator<SCC> SCCBPA; @@ -1045,18 +1094,6 @@ private: /// These are all of the RefSCCs which have no children. SmallVector<RefSCC *, 4> LeafRefSCCs; - /// Stack of nodes in the DFS walk. - SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack; - - /// Set of entry nodes not-yet-processed into RefSCCs. - SmallVector<Function *, 4> RefSCCEntryNodes; - - /// Stack of nodes the DFS has walked but not yet put into a RefSCC. - SmallVector<Node *, 4> PendingRefSCCStack; - - /// Counter for the next DFS number to assign. - int NextDFSNumber; - /// Helper to insert a new function, with an already looked-up entry in /// the NodeMap. Node &insertInto(Function &F, Node *&MappedN); @@ -1078,6 +1115,23 @@ private: return new (RefSCCBPA.Allocate()) RefSCC(std::forward<Ts>(Args)...); } + /// Common logic for building SCCs from a sequence of roots. + /// + /// This is a very generic implementation of the depth-first walk and SCC + /// formation algorithm. It uses a generic sequence of roots and generic + /// callbacks for each step. This is designed to be used to implement both + /// the RefSCC formation and SCC formation with shared logic. + /// + /// Currently this is a relatively naive implementation of Tarjan's DFS + /// algorithm to form the SCCs. + /// + /// FIXME: We should consider newer variants such as Nuutila. + template <typename RootsT, typename GetBeginT, typename GetEndT, + typename GetNodeT, typename FormSCCCallbackT> + static void buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin, + GetEndT &&GetEnd, GetNodeT &&GetNode, + FormSCCCallbackT &&FormSCC); + /// Build the SCCs for a RefSCC out of a list of nodes. void buildSCCs(RefSCC &RC, node_stack_range Nodes); @@ -1098,22 +1152,12 @@ private: "Index does not point back at RC!"); return IndexIt->second; } - - /// Builds the next node in the post-order RefSCC walk of the call graph and - /// appends it to the \c PostOrderRefSCCs vector. - /// - /// Returns true if a new RefSCC was successfully constructed, and false if - /// there are no more RefSCCs to build in the graph. - bool buildNextRefSCCInPostOrder(); }; inline LazyCallGraph::Edge::Edge() : Value() {} -inline LazyCallGraph::Edge::Edge(Function &F, Kind K) : Value(&F, K) {} inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {} -inline LazyCallGraph::Edge::operator bool() const { - return !Value.getPointer().isNull(); -} +inline LazyCallGraph::Edge::operator bool() const { return Value.getPointer(); } inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const { assert(*this && "Queried a null edge!"); @@ -1125,51 +1169,32 @@ inline bool LazyCallGraph::Edge::isCall() const { return getKind() == Call; } -inline Function &LazyCallGraph::Edge::getFunction() const { +inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode() const { assert(*this && "Queried a null edge!"); - auto P = Value.getPointer(); - if (auto *F = P.dyn_cast<Function *>()) - return *F; - - return P.get<Node *>()->getFunction(); + return *Value.getPointer(); } -inline LazyCallGraph::Node *LazyCallGraph::Edge::getNode() const { - assert(*this && "Queried a null edge!"); - auto P = Value.getPointer(); - if (auto *N = P.dyn_cast<Node *>()) - return N; - - return nullptr; -} - -inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode(LazyCallGraph &G) { +inline Function &LazyCallGraph::Edge::getFunction() const { assert(*this && "Queried a null edge!"); - auto P = Value.getPointer(); - if (auto *N = P.dyn_cast<Node *>()) - return *N; - - Node &N = G.get(*P.get<Function *>()); - Value.setPointer(&N); - return N; + return getNode().getFunction(); } // Provide GraphTraits specializations for call graphs. template <> struct GraphTraits<LazyCallGraph::Node *> { typedef LazyCallGraph::Node *NodeRef; - typedef LazyCallGraph::edge_iterator ChildIteratorType; + typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType; static NodeRef getEntryNode(NodeRef N) { return N; } - static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } - static ChildIteratorType child_end(NodeRef N) { return N->end(); } + static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); } + static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); } }; template <> struct GraphTraits<LazyCallGraph *> { typedef LazyCallGraph::Node *NodeRef; - typedef LazyCallGraph::edge_iterator ChildIteratorType; + typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType; static NodeRef getEntryNode(NodeRef N) { return N; } - static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } - static ChildIteratorType child_end(NodeRef N) { return N->end(); } + static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); } + static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); } }; /// An analysis pass which computes the call graph for a module. diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h index 610791023a7d..49e088e533dc 100644 --- a/include/llvm/Analysis/LazyValueInfo.h +++ b/include/llvm/Analysis/LazyValueInfo.h @@ -32,6 +32,7 @@ namespace llvm { class LazyValueInfo { friend class LazyValueInfoWrapperPass; AssumptionCache *AC = nullptr; + const DataLayout *DL = nullptr; class TargetLibraryInfo *TLI = nullptr; DominatorTree *DT = nullptr; void *PImpl = nullptr; @@ -40,16 +41,17 @@ class LazyValueInfo { public: ~LazyValueInfo(); LazyValueInfo() {} - LazyValueInfo(AssumptionCache *AC_, TargetLibraryInfo *TLI_, + LazyValueInfo(AssumptionCache *AC_, const DataLayout *DL_, TargetLibraryInfo *TLI_, DominatorTree *DT_) - : AC(AC_), TLI(TLI_), DT(DT_) {} + : AC(AC_), DL(DL_), TLI(TLI_), DT(DT_) {} LazyValueInfo(LazyValueInfo &&Arg) - : AC(Arg.AC), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) { + : AC(Arg.AC), DL(Arg.DL), TLI(Arg.TLI), DT(Arg.DT), PImpl(Arg.PImpl) { Arg.PImpl = nullptr; } LazyValueInfo &operator=(LazyValueInfo &&Arg) { releaseMemory(); AC = Arg.AC; + DL = Arg.DL; TLI = Arg.TLI; DT = Arg.DT; PImpl = Arg.PImpl; @@ -98,8 +100,15 @@ public: /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); + /// Print the \LazyValueInfoCache. + void printCache(Function &F, raw_ostream &OS); + // For old PM pass. Delete once LazyValueInfoWrapperPass is gone. void releaseMemory(); + + /// Handle invalidation events in the new pass manager. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); }; /// \brief Analysis to compute lazy value information. diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index e167f36219d2..a59c1f88e229 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -85,8 +85,37 @@ Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, AliasAnalysis *AA = nullptr, - bool *IsLoadCSE = nullptr); + bool *IsLoadCSE = nullptr, + unsigned *NumScanedInst = nullptr); +/// Scan backwards to see if we have the value of the given pointer available +/// locally within a small number of instructions. +/// +/// You can use this function to scan across multiple blocks: after you call +/// this function, if ScanFrom points at the beginning of the block, it's safe +/// to continue scanning the predecessors. +/// +/// \param Ptr The pointer we want the load and store to originate from. +/// \param AccessTy The access type of the pointer. +/// \param AtLeastAtomic Are we looking for at-least an atomic load/store ? In +/// case it is false, we can return an atomic or non-atomic load or store. In +/// case it is true, we need to return an atomic load or store. +/// \param ScanBB The basic block to scan. +/// \param [in,out] ScanFrom The location to start scanning from. When this +/// function returns, it points at the last instruction scanned. +/// \param MaxInstsToScan The maximum number of instructions to scan. If this +/// is zero, the whole block will be scanned. +/// \param AA Optional pointer to alias analysis, to make the scan more +/// precise. +/// \param [out] IsLoad Whether the returned value is a load from the same +/// location in memory, as opposed to the value operand of a store. +/// +/// \returns The found value, or nullptr if no value is found. +Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, + BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, AliasAnalysis *AA, + bool *IsLoad, unsigned *NumScanedInst); } #endif diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index 901b193c7e2d..2568903c57f3 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -38,39 +38,6 @@ class SCEVUnionPredicate; class LoopAccessInfo; class OptimizationRemarkEmitter; -/// Optimization analysis message produced during vectorization. Messages inform -/// the user why vectorization did not occur. -class LoopAccessReport { - std::string Message; - const Instruction *Instr; - -protected: - LoopAccessReport(const Twine &Message, const Instruction *I) - : Message(Message.str()), Instr(I) {} - -public: - LoopAccessReport(const Instruction *I = nullptr) : Instr(I) {} - - template <typename A> LoopAccessReport &operator<<(const A &Value) { - raw_string_ostream Out(Message); - Out << Value; - return *this; - } - - const Instruction *getInstr() const { return Instr; } - - std::string &str() { return Message; } - const std::string &str() const { return Message; } - operator Twine() { return Message; } - - /// \brief Emit an analysis note for \p PassName with the debug location from - /// the instruction in \p Message if available. Otherwise use the location of - /// \p TheLoop. - static void emitAnalysis(const LoopAccessReport &Message, const Loop *TheLoop, - const char *PassName, - OptimizationRemarkEmitter &ORE); -}; - /// \brief Collection of parameters shared beetween the Loop Vectorizer and the /// Loop Access Analysis. struct VectorizerParams { @@ -126,7 +93,7 @@ struct VectorizerParams { class MemoryDepChecker { public: typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; - typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet; + typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; /// \brief Set of potential dependent memory accesses. typedef EquivalenceClasses<MemAccessInfo> DepCandidates; @@ -221,7 +188,7 @@ public: /// \brief Check whether the dependencies between the accesses are safe. /// /// Only checks sets with elements in \p CheckDeps. - bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, + bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides); /// \brief No memory dependence was encountered that would inhibit diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 20e6af2727fe..996794b660a9 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -26,7 +26,7 @@ // * etc... // // Note that this analysis specifically identifies *Loops* not cycles or SCCs -// in the CFG. There can be strongly connected compontents in the CFG which +// in the CFG. There can be strongly connected components in the CFG which // this analysis will not recognize and that will not be represented by a Loop // instance. In particular, a Loop might be inside such a non-loop SCC, or a // non-loop SCC might contain a sub-SCC which is a Loop. @@ -364,7 +364,7 @@ extern template class LoopBase<BasicBlock, Loop>; /// Represents a single loop in the control flow graph. Note that not all SCCs -/// in the CFG are neccessarily loops. +/// in the CFG are necessarily loops. class Loop : public LoopBase<BasicBlock, Loop> { public: /// \brief A range representing the start and end location of a loop. @@ -469,7 +469,7 @@ public: /// the loop that branches to the loop header. /// /// The LoopID metadata node should have one or more operands and the first - /// operand should should be the node itself. + /// operand should be the node itself. void setLoopID(MDNode *LoopID) const; /// Return true if no exit block for the loop has a predecessor that is @@ -478,7 +478,8 @@ public: /// Return all unique successor blocks of this loop. /// These are the blocks _outside of the current loop_ which are branched to. - /// This assumes that loop exits are in canonical form. + /// This assumes that loop exits are in canonical form, i.e. all exits are + /// dedicated exits. void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const; /// If getUniqueExitBlocks would return exactly one block, return that block. @@ -570,6 +571,23 @@ public: reverse_iterator rend() const { return TopLevelLoops.rend(); } bool empty() const { return TopLevelLoops.empty(); } + /// Return all of the loops in the function in preorder across the loop + /// nests, with siblings in forward program order. + /// + /// Note that because loops form a forest of trees, preorder is equivalent to + /// reverse postorder. + SmallVector<LoopT *, 4> getLoopsInPreorder(); + + /// Return all of the loops in the function in preorder across the loop + /// nests, with siblings in *reverse* program order. + /// + /// Note that because loops form a forest of trees, preorder is equivalent to + /// reverse postorder. + /// + /// Also note that this is *not* a reverse preorder. Only the siblings are in + /// reverse program order. + SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder(); + /// Return the inner most loop that BB lives in. If a basic block is in no /// loop (for example the entry node), null is returned. LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); } @@ -682,6 +700,10 @@ public: return *this; } + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + // Most of the public interface is provided via LoopInfoBase. /// Update LoopInfo after removing the last backedge from a loop. This updates diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 833a2202a568..761f8721b54f 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -507,6 +507,55 @@ analyze(const DominatorTreeBase<BlockT> &DomTree) { DFS.traverse(DomRoot->getBlock()); } +template <class BlockT, class LoopT> +SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() { + SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist; + // The outer-most loop actually goes into the result in the same relative + // order as we walk it. But LoopInfo stores the top level loops in reverse + // program order so for here we reverse it to get forward program order. + // FIXME: If we change the order of LoopInfo we will want to remove the + // reverse here. + for (LoopT *RootL : reverse(*this)) { + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + LoopT *L = PreOrderWorklist.pop_back_val(); + // Sub-loops are stored in forward program order, but will process the + // worklist backwards so append them in reverse order. + PreOrderWorklist.append(L->rbegin(), L->rend()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + } + + return PreOrderLoops; +} + +template <class BlockT, class LoopT> +SmallVector<LoopT *, 4> +LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() { + SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist; + // The outer-most loop actually goes into the result in the same relative + // order as we walk it. LoopInfo stores the top level loops in reverse + // program order so we walk in order here. + // FIXME: If we change the order of LoopInfo we will want to add a reverse + // here. + for (LoopT *RootL : *this) { + assert(PreOrderWorklist.empty() && + "Must start with an empty preorder walk worklist."); + PreOrderWorklist.push_back(RootL); + do { + LoopT *L = PreOrderWorklist.pop_back_val(); + // Sub-loops are stored in forward program order, but will process the + // worklist backwards so we can just append them in order. + PreOrderWorklist.append(L->begin(), L->end()); + PreOrderLoops.push_back(L); + } while (!PreOrderWorklist.empty()); + } + + return PreOrderLoops; +} + // Debugging template<class BlockT, class LoopT> void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const { @@ -528,15 +577,48 @@ bool compareVectors(std::vector<T> &BB1, std::vector<T> &BB2) { } template <class BlockT, class LoopT> -static void -addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders, - const LoopInfoBase<BlockT, LoopT> &LI, - const LoopT &L) { +void addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders, + const LoopInfoBase<BlockT, LoopT> &LI, + const LoopT &L) { LoopHeaders[L.getHeader()] = &L; for (LoopT *SL : L) addInnerLoopsToHeadersMap(LoopHeaders, LI, *SL); } +#ifndef NDEBUG +template <class BlockT, class LoopT> +static void compareLoops(const LoopT *L, const LoopT *OtherL, + DenseMap<BlockT *, const LoopT *> &OtherLoopHeaders) { + BlockT *H = L->getHeader(); + BlockT *OtherH = OtherL->getHeader(); + assert(H == OtherH && + "Mismatched headers even though found in the same map entry!"); + + assert(L->getLoopDepth() == OtherL->getLoopDepth() && + "Mismatched loop depth!"); + const LoopT *ParentL = L, *OtherParentL = OtherL; + do { + assert(ParentL->getHeader() == OtherParentL->getHeader() && + "Mismatched parent loop headers!"); + ParentL = ParentL->getParentLoop(); + OtherParentL = OtherParentL->getParentLoop(); + } while (ParentL); + + for (const LoopT *SubL : *L) { + BlockT *SubH = SubL->getHeader(); + const LoopT *OtherSubL = OtherLoopHeaders.lookup(SubH); + assert(OtherSubL && "Inner loop is missing in computed loop info!"); + OtherLoopHeaders.erase(SubH); + compareLoops(SubL, OtherSubL, OtherLoopHeaders); + } + + std::vector<BlockT *> BBs = L->getBlocks(); + std::vector<BlockT *> OtherBBs = OtherL->getBlocks(); + assert(compareVectors(BBs, OtherBBs) && + "Mismatched basic blocks in the loops!"); +} +#endif + template <class BlockT, class LoopT> void LoopInfoBase<BlockT, LoopT>::verify( const DominatorTreeBase<BlockT> &DomTree) const { @@ -559,42 +641,32 @@ void LoopInfoBase<BlockT, LoopT>::verify( LoopInfoBase<BlockT, LoopT> OtherLI; OtherLI.analyze(DomTree); - DenseMap<BlockT *, const LoopT *> LoopHeaders1; - DenseMap<BlockT *, const LoopT *> LoopHeaders2; - - for (LoopT *L : *this) - addInnerLoopsToHeadersMap(LoopHeaders1, *this, *L); + // Build a map we can use to move from our LI to the computed one. This + // allows us to ignore the particular order in any layer of the loop forest + // while still comparing the structure. + DenseMap<BlockT *, const LoopT *> OtherLoopHeaders; for (LoopT *L : OtherLI) - addInnerLoopsToHeadersMap(LoopHeaders2, OtherLI, *L); - assert(LoopHeaders1.size() == LoopHeaders2.size() && - "LoopInfo is incorrect."); - - auto compareLoops = [&](const LoopT *L1, const LoopT *L2) { - BlockT *H1 = L1->getHeader(); - BlockT *H2 = L2->getHeader(); - if (H1 != H2) - return false; - std::vector<BlockT *> BB1 = L1->getBlocks(); - std::vector<BlockT *> BB2 = L2->getBlocks(); - if (!compareVectors(BB1, BB2)) - return false; - - std::vector<BlockT *> SubLoopHeaders1; - std::vector<BlockT *> SubLoopHeaders2; - for (LoopT *L : *L1) - SubLoopHeaders1.push_back(L->getHeader()); - for (LoopT *L : *L2) - SubLoopHeaders2.push_back(L->getHeader()); - - if (!compareVectors(SubLoopHeaders1, SubLoopHeaders2)) - return false; - return true; - }; - - for (auto &I : LoopHeaders1) { - BlockT *H = I.first; - bool LoopsMatch = compareLoops(LoopHeaders1[H], LoopHeaders2[H]); - assert(LoopsMatch && "LoopInfo is incorrect."); + addInnerLoopsToHeadersMap(OtherLoopHeaders, OtherLI, *L); + + // Walk the top level loops and ensure there is a corresponding top-level + // loop in the computed version and then recursively compare those loop + // nests. + for (LoopT *L : *this) { + BlockT *Header = L->getHeader(); + const LoopT *OtherL = OtherLoopHeaders.lookup(Header); + assert(OtherL && "Top level loop is missing in computed loop info!"); + // Now that we've matched this loop, erase its header from the map. + OtherLoopHeaders.erase(Header); + // And recursively compare these loops. + compareLoops(L, OtherL, OtherLoopHeaders); + } + + // Any remaining entries in the map are loops which were found when computing + // a fresh LoopInfo but not present in the current one. + if (!OtherLoopHeaders.empty()) { + for (const auto &HeaderAndLoop : OtherLoopHeaders) + dbgs() << "Found new loop: " << *HeaderAndLoop.second << "\n"; + llvm_unreachable("Found new loops when recomputing LoopInfo!"); } #endif } diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index b58f07e69475..c5514316f75f 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -32,12 +32,6 @@ class TargetLibraryInfo; class Type; class Value; -enum class ObjSizeMode { - Exact = 0, - Min = 1, - Max = 2 -}; - /// \brief Tests if a value is a call or invoke to a library function that /// allocates or reallocates memory (either malloc, calloc, realloc, or strdup /// like). @@ -129,17 +123,36 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { // Utility functions to compute size of objects. // +/// Various options to control the behavior of getObjectSize. +struct ObjectSizeOpts { + /// Controls how we handle conditional statements with unknown conditions. + enum class Mode : uint8_t { + /// Fail to evaluate an unknown condition. + Exact, + /// Evaluate all branches of an unknown condition. If all evaluations + /// succeed, pick the minimum size. + Min, + /// Same as Min, except we pick the maximum size of all of the branches. + Max + }; + + /// How we want to evaluate this object's size. + Mode EvalMode = Mode::Exact; + /// Whether to round the result up to the alignment of allocas, byval + /// arguments, and global variables. + bool RoundToAlign = false; + /// If this is true, null pointers in address space 0 will be treated as + /// though they can't be evaluated. Otherwise, null is always considered to + /// point to a 0 byte region of memory. + bool NullIsUnknownSize = false; +}; + /// \brief Compute the size of the object pointed by Ptr. Returns true and the /// object size in Size if successful, and false otherwise. In this context, by /// object we mean the region of memory starting at Ptr to the end of the /// underlying object pointed to by Ptr. -/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, -/// byval arguments, and global variables. -/// If Mode is Min or Max the size will be evaluated even if it depends on -/// a condition and corresponding value will be returned (min or max). bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL, - const TargetLibraryInfo *TLI, bool RoundToAlign = false, - ObjSizeMode Mode = ObjSizeMode::Exact); + const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {}); /// Try to turn a call to @llvm.objectsize into an integer value of the given /// Type. Returns null on failure. @@ -160,8 +173,7 @@ class ObjectSizeOffsetVisitor const DataLayout &DL; const TargetLibraryInfo *TLI; - bool RoundToAlign; - ObjSizeMode Mode; + ObjectSizeOpts Options; unsigned IntTyBits; APInt Zero; SmallPtrSet<Instruction *, 8> SeenInsts; @@ -174,8 +186,7 @@ class ObjectSizeOffsetVisitor public: ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI, - LLVMContext &Context, bool RoundToAlign = false, - ObjSizeMode Mode = ObjSizeMode::Exact); + LLVMContext &Context, ObjectSizeOpts Options = {}); SizeOffsetType compute(Value *V); diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h new file mode 100644 index 000000000000..db31ae9f4f10 --- /dev/null +++ b/include/llvm/Analysis/MemorySSA.h @@ -0,0 +1,1155 @@ +//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file exposes an interface to building/using memory SSA to +/// walk memory instructions using a use/def graph. +/// +/// Memory SSA class builds an SSA form that links together memory access +/// instructions such as loads, stores, atomics, and calls. Additionally, it +/// does a trivial form of "heap versioning" Every time the memory state changes +/// in the program, we generate a new heap version. It generates +/// MemoryDef/Uses/Phis that are overlayed on top of the existing instructions. +/// +/// As a trivial example, +/// define i32 @main() #0 { +/// entry: +/// %call = call noalias i8* @_Znwm(i64 4) #2 +/// %0 = bitcast i8* %call to i32* +/// %call1 = call noalias i8* @_Znwm(i64 4) #2 +/// %1 = bitcast i8* %call1 to i32* +/// store i32 5, i32* %0, align 4 +/// store i32 7, i32* %1, align 4 +/// %2 = load i32* %0, align 4 +/// %3 = load i32* %1, align 4 +/// %add = add nsw i32 %2, %3 +/// ret i32 %add +/// } +/// +/// Will become +/// define i32 @main() #0 { +/// entry: +/// ; 1 = MemoryDef(0) +/// %call = call noalias i8* @_Znwm(i64 4) #3 +/// %2 = bitcast i8* %call to i32* +/// ; 2 = MemoryDef(1) +/// %call1 = call noalias i8* @_Znwm(i64 4) #3 +/// %4 = bitcast i8* %call1 to i32* +/// ; 3 = MemoryDef(2) +/// store i32 5, i32* %2, align 4 +/// ; 4 = MemoryDef(3) +/// store i32 7, i32* %4, align 4 +/// ; MemoryUse(3) +/// %7 = load i32* %2, align 4 +/// ; MemoryUse(4) +/// %8 = load i32* %4, align 4 +/// %add = add nsw i32 %7, %8 +/// ret i32 %add +/// } +/// +/// Given this form, all the stores that could ever effect the load at %8 can be +/// gotten by using the MemoryUse associated with it, and walking from use to +/// def until you hit the top of the function. +/// +/// Each def also has a list of users associated with it, so you can walk from +/// both def to users, and users to defs. Note that we disambiguate MemoryUses, +/// but not the RHS of MemoryDefs. You can see this above at %7, which would +/// otherwise be a MemoryUse(4). Being disambiguated means that for a given +/// store, all the MemoryUses on its use lists are may-aliases of that store +/// (but the MemoryDefs on its use list may not be). +/// +/// MemoryDefs are not disambiguated because it would require multiple reaching +/// definitions, which would require multiple phis, and multiple memoryaccesses +/// per instruction. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYSSA_H +#define LLVM_ANALYSIS_MEMORYSSA_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <iterator> +#include <memory> +#include <utility> + +namespace llvm { + +class Function; +class Instruction; +class MemoryAccess; +class LLVMContext; +class raw_ostream; +namespace MSSAHelpers { +struct AllAccessTag {}; +struct DefsOnlyTag {}; +} + +enum { + // Used to signify what the default invalid ID is for MemoryAccess's + // getID() + INVALID_MEMORYACCESS_ID = 0 +}; + +template <class T> class memoryaccess_def_iterator_base; +using memoryaccess_def_iterator = memoryaccess_def_iterator_base<MemoryAccess>; +using const_memoryaccess_def_iterator = + memoryaccess_def_iterator_base<const MemoryAccess>; + +// \brief The base for all memory accesses. All memory accesses in a block are +// linked together using an intrusive list. +class MemoryAccess + : public User, + public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>, + public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>> { +public: + using AllAccessType = + ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>; + using DefsOnlyType = + ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>; + + // Methods for support type inquiry through isa, cast, and + // dyn_cast + static inline bool classof(const Value *V) { + unsigned ID = V->getValueID(); + return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal; + } + + MemoryAccess(const MemoryAccess &) = delete; + MemoryAccess &operator=(const MemoryAccess &) = delete; + ~MemoryAccess() override; + + void *operator new(size_t, unsigned) = delete; + void *operator new(size_t) = delete; + + BasicBlock *getBlock() const { return Block; } + + virtual void print(raw_ostream &OS) const = 0; + virtual void dump() const; + + /// \brief The user iterators for a memory access + typedef user_iterator iterator; + typedef const_user_iterator const_iterator; + + /// \brief This iterator walks over all of the defs in a given + /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For + /// MemoryUse/MemoryDef, this walks the defining access. + memoryaccess_def_iterator defs_begin(); + const_memoryaccess_def_iterator defs_begin() const; + memoryaccess_def_iterator defs_end(); + const_memoryaccess_def_iterator defs_end() const; + + /// \brief Get the iterators for the all access list and the defs only list + /// We default to the all access list. + AllAccessType::self_iterator getIterator() { + return this->AllAccessType::getIterator(); + } + AllAccessType::const_self_iterator getIterator() const { + return this->AllAccessType::getIterator(); + } + AllAccessType::reverse_self_iterator getReverseIterator() { + return this->AllAccessType::getReverseIterator(); + } + AllAccessType::const_reverse_self_iterator getReverseIterator() const { + return this->AllAccessType::getReverseIterator(); + } + DefsOnlyType::self_iterator getDefsIterator() { + return this->DefsOnlyType::getIterator(); + } + DefsOnlyType::const_self_iterator getDefsIterator() const { + return this->DefsOnlyType::getIterator(); + } + DefsOnlyType::reverse_self_iterator getReverseDefsIterator() { + return this->DefsOnlyType::getReverseIterator(); + } + DefsOnlyType::const_reverse_self_iterator getReverseDefsIterator() const { + return this->DefsOnlyType::getReverseIterator(); + } + +protected: + friend class MemorySSA; + friend class MemoryUseOrDef; + friend class MemoryUse; + friend class MemoryDef; + friend class MemoryPhi; + + /// \brief Used by MemorySSA to change the block of a MemoryAccess when it is + /// moved. + void setBlock(BasicBlock *BB) { Block = BB; } + + /// \brief Used for debugging and tracking things about MemoryAccesses. + /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. + virtual unsigned getID() const = 0; + + MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB, + unsigned NumOperands) + : User(Type::getVoidTy(C), Vty, nullptr, NumOperands), Block(BB) {} + +private: + BasicBlock *Block; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) { + MA.print(OS); + return OS; +} + +/// \brief Class that has the common methods + fields of memory uses/defs. It's +/// a little awkward to have, but there are many cases where we want either a +/// use or def, and there are many cases where uses are needed (defs aren't +/// acceptable), and vice-versa. +/// +/// This class should never be instantiated directly; make a MemoryUse or +/// MemoryDef instead. +class MemoryUseOrDef : public MemoryAccess { +public: + void *operator new(size_t, unsigned) = delete; + void *operator new(size_t) = delete; + + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + /// \brief Get the instruction that this MemoryUse represents. + Instruction *getMemoryInst() const { return MemoryInst; } + + /// \brief Get the access that produces the memory state used by this Use. + MemoryAccess *getDefiningAccess() const { return getOperand(0); } + + static inline bool classof(const Value *MA) { + return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal; + } + + // Sadly, these have to be public because they are needed in some of the + // iterators. + virtual bool isOptimized() const = 0; + virtual MemoryAccess *getOptimized() const = 0; + virtual void setOptimized(MemoryAccess *) = 0; + + /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to + /// be rewalked by the walker if necessary. + /// This really should only be called by tests. + virtual void resetOptimized() = 0; + +protected: + friend class MemorySSA; + friend class MemorySSAUpdater; + MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty, + Instruction *MI, BasicBlock *BB) + : MemoryAccess(C, Vty, BB, 1), MemoryInst(MI) { + setDefiningAccess(DMA); + } + void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) { + if (!Optimized) { + setOperand(0, DMA); + return; + } + setOptimized(DMA); + } + +private: + Instruction *MemoryInst; +}; + +template <> +struct OperandTraits<MemoryUseOrDef> + : public FixedNumOperandTraits<MemoryUseOrDef, 1> {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess) + +/// \brief Represents read-only accesses to memory +/// +/// In particular, the set of Instructions that will be represented by +/// MemoryUse's is exactly the set of Instructions for which +/// AliasAnalysis::getModRefInfo returns "Ref". +class MemoryUse final : public MemoryUseOrDef { +public: + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) + : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {} + + // allocate space for exactly one operand + void *operator new(size_t s) { return User::operator new(s, 1); } + void *operator new(size_t, unsigned) = delete; + + static inline bool classof(const Value *MA) { + return MA->getValueID() == MemoryUseVal; + } + + void print(raw_ostream &OS) const override; + + virtual void setOptimized(MemoryAccess *DMA) override { + OptimizedID = DMA->getID(); + setOperand(0, DMA); + } + + virtual bool isOptimized() const override { + return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); + } + + virtual MemoryAccess *getOptimized() const override { + return getDefiningAccess(); + } + virtual void resetOptimized() override { + OptimizedID = INVALID_MEMORYACCESS_ID; + } + +protected: + friend class MemorySSA; + + unsigned getID() const override { + llvm_unreachable("MemoryUses do not have IDs"); + } + +private: + unsigned int OptimizedID; +}; + +template <> +struct OperandTraits<MemoryUse> : public FixedNumOperandTraits<MemoryUse, 1> {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess) + +/// \brief Represents a read-write access to memory, whether it is a must-alias, +/// or a may-alias. +/// +/// In particular, the set of Instructions that will be represented by +/// MemoryDef's is exactly the set of Instructions for which +/// AliasAnalysis::getModRefInfo returns "Mod" or "ModRef". +/// Note that, in order to provide def-def chains, all defs also have a use +/// associated with them. This use points to the nearest reaching +/// MemoryDef/MemoryPhi. +class MemoryDef final : public MemoryUseOrDef { +public: + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, + unsigned Ver) + : MemoryUseOrDef(C, DMA, MemoryDefVal, MI, BB), ID(Ver), + Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {} + + // allocate space for exactly one operand + void *operator new(size_t s) { return User::operator new(s, 1); } + void *operator new(size_t, unsigned) = delete; + + static inline bool classof(const Value *MA) { + return MA->getValueID() == MemoryDefVal; + } + + virtual void setOptimized(MemoryAccess *MA) override { + Optimized = MA; + OptimizedID = getDefiningAccess()->getID(); + } + virtual MemoryAccess *getOptimized() const override { return Optimized; } + virtual bool isOptimized() const override { + return getOptimized() && getDefiningAccess() && + OptimizedID == getDefiningAccess()->getID(); + } + virtual void resetOptimized() override { + OptimizedID = INVALID_MEMORYACCESS_ID; + } + + void print(raw_ostream &OS) const override; + +protected: + friend class MemorySSA; + + unsigned getID() const override { return ID; } + +private: + const unsigned ID; + MemoryAccess *Optimized; + unsigned int OptimizedID; +}; + +template <> +struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 1> {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess) + +/// \brief Represents phi nodes for memory accesses. +/// +/// These have the same semantic as regular phi nodes, with the exception that +/// only one phi will ever exist in a given basic block. +/// Guaranteeing one phi per block means guaranteeing there is only ever one +/// valid reaching MemoryDef/MemoryPHI along each path to the phi node. +/// This is ensured by not allowing disambiguation of the RHS of a MemoryDef or +/// a MemoryPhi's operands. +/// That is, given +/// if (a) { +/// store %a +/// store %b +/// } +/// it *must* be transformed into +/// if (a) { +/// 1 = MemoryDef(liveOnEntry) +/// store %a +/// 2 = MemoryDef(1) +/// store %b +/// } +/// and *not* +/// if (a) { +/// 1 = MemoryDef(liveOnEntry) +/// store %a +/// 2 = MemoryDef(liveOnEntry) +/// store %b +/// } +/// even if the two stores do not conflict. Otherwise, both 1 and 2 reach the +/// end of the branch, and if there are not two phi nodes, one will be +/// disconnected completely from the SSA graph below that point. +/// Because MemoryUse's do not generate new definitions, they do not have this +/// issue. +class MemoryPhi final : public MemoryAccess { + // allocate space for exactly zero operands + void *operator new(size_t s) { return User::operator new(s); } + +public: + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0) + : MemoryAccess(C, MemoryPhiVal, BB, 0), ID(Ver), ReservedSpace(NumPreds) { + allocHungoffUses(ReservedSpace); + } + + void *operator new(size_t, unsigned) = delete; + + // Block iterator interface. This provides access to the list of incoming + // basic blocks, which parallels the list of incoming values. + typedef BasicBlock **block_iterator; + typedef BasicBlock *const *const_block_iterator; + + block_iterator block_begin() { + auto *Ref = reinterpret_cast<Use::UserRef *>(op_begin() + ReservedSpace); + return reinterpret_cast<block_iterator>(Ref + 1); + } + + const_block_iterator block_begin() const { + const auto *Ref = + reinterpret_cast<const Use::UserRef *>(op_begin() + ReservedSpace); + return reinterpret_cast<const_block_iterator>(Ref + 1); + } + + block_iterator block_end() { return block_begin() + getNumOperands(); } + + const_block_iterator block_end() const { + return block_begin() + getNumOperands(); + } + + iterator_range<block_iterator> blocks() { + return make_range(block_begin(), block_end()); + } + + iterator_range<const_block_iterator> blocks() const { + return make_range(block_begin(), block_end()); + } + + op_range incoming_values() { return operands(); } + + const_op_range incoming_values() const { return operands(); } + + /// \brief Return the number of incoming edges + unsigned getNumIncomingValues() const { return getNumOperands(); } + + /// \brief Return incoming value number x + MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); } + void setIncomingValue(unsigned I, MemoryAccess *V) { + assert(V && "PHI node got a null value!"); + setOperand(I, V); + } + static unsigned getOperandNumForIncomingValue(unsigned I) { return I; } + static unsigned getIncomingValueNumForOperand(unsigned I) { return I; } + + /// \brief Return incoming basic block number @p i. + BasicBlock *getIncomingBlock(unsigned I) const { return block_begin()[I]; } + + /// \brief Return incoming basic block corresponding + /// to an operand of the PHI. + BasicBlock *getIncomingBlock(const Use &U) const { + assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?"); + return getIncomingBlock(unsigned(&U - op_begin())); + } + + /// \brief Return incoming basic block corresponding + /// to value use iterator. + BasicBlock *getIncomingBlock(MemoryAccess::const_user_iterator I) const { + return getIncomingBlock(I.getUse()); + } + + void setIncomingBlock(unsigned I, BasicBlock *BB) { + assert(BB && "PHI node got a null basic block!"); + block_begin()[I] = BB; + } + + /// \brief Add an incoming value to the end of the PHI list + void addIncoming(MemoryAccess *V, BasicBlock *BB) { + if (getNumOperands() == ReservedSpace) + growOperands(); // Get more space! + // Initialize some new operands. + setNumHungOffUseOperands(getNumOperands() + 1); + setIncomingValue(getNumOperands() - 1, V); + setIncomingBlock(getNumOperands() - 1, BB); + } + + /// \brief Return the first index of the specified basic + /// block in the value list for this PHI. Returns -1 if no instance. + int getBasicBlockIndex(const BasicBlock *BB) const { + for (unsigned I = 0, E = getNumOperands(); I != E; ++I) + if (block_begin()[I] == BB) + return I; + return -1; + } + + Value *getIncomingValueForBlock(const BasicBlock *BB) const { + int Idx = getBasicBlockIndex(BB); + assert(Idx >= 0 && "Invalid basic block argument!"); + return getIncomingValue(Idx); + } + + static inline bool classof(const Value *V) { + return V->getValueID() == MemoryPhiVal; + } + + void print(raw_ostream &OS) const override; + +protected: + friend class MemorySSA; + + /// \brief this is more complicated than the generic + /// User::allocHungoffUses, because we have to allocate Uses for the incoming + /// values and pointers to the incoming blocks, all in one allocation. + void allocHungoffUses(unsigned N) { + User::allocHungoffUses(N, /* IsPhi */ true); + } + + unsigned getID() const final { return ID; } + +private: + // For debugging only + const unsigned ID; + unsigned ReservedSpace; + + /// \brief This grows the operand list in response to a push_back style of + /// operation. This grows the number of ops by 1.5 times. + void growOperands() { + unsigned E = getNumOperands(); + // 2 op PHI nodes are VERY common, so reserve at least enough for that. + ReservedSpace = std::max(E + E / 2, 2u); + growHungoffUses(ReservedSpace, /* IsPhi */ true); + } +}; + +template <> struct OperandTraits<MemoryPhi> : public HungoffOperandTraits<2> {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess) + +class MemorySSAWalker; + +/// \brief Encapsulates MemorySSA, including all data associated with memory +/// accesses. +class MemorySSA { +public: + MemorySSA(Function &, AliasAnalysis *, DominatorTree *); + ~MemorySSA(); + + MemorySSAWalker *getWalker(); + + /// \brief Given a memory Mod/Ref'ing instruction, get the MemorySSA + /// access associated with it. If passed a basic block gets the memory phi + /// node that exists for that block, if there is one. Otherwise, this will get + /// a MemoryUseOrDef. + MemoryUseOrDef *getMemoryAccess(const Instruction *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *BB) const; + + void dump() const; + void print(raw_ostream &) const; + + /// \brief Return true if \p MA represents the live on entry value + /// + /// Loads and stores from pointer arguments and other global values may be + /// defined by memory operations that do not occur in the current function, so + /// they may be live on entry to the function. MemorySSA represents such + /// memory state by the live on entry definition, which is guaranteed to occur + /// before any other memory access in the function. + inline bool isLiveOnEntryDef(const MemoryAccess *MA) const { + return MA == LiveOnEntryDef.get(); + } + + inline MemoryAccess *getLiveOnEntryDef() const { + return LiveOnEntryDef.get(); + } + + // Sadly, iplists, by default, owns and deletes pointers added to the + // list. It's not currently possible to have two iplists for the same type, + // where one owns the pointers, and one does not. This is because the traits + // are per-type, not per-tag. If this ever changes, we should make the + // DefList an iplist. + using AccessList = iplist<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>; + using DefsList = + simple_ilist<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>; + + /// \brief Return the list of MemoryAccess's for a given basic block. + /// + /// This list is not modifiable by the user. + const AccessList *getBlockAccesses(const BasicBlock *BB) const { + return getWritableBlockAccesses(BB); + } + + /// \brief Return the list of MemoryDef's and MemoryPhi's for a given basic + /// block. + /// + /// This list is not modifiable by the user. + const DefsList *getBlockDefs(const BasicBlock *BB) const { + return getWritableBlockDefs(BB); + } + + /// \brief Given two memory accesses in the same basic block, determine + /// whether MemoryAccess \p A dominates MemoryAccess \p B. + bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const; + + /// \brief Given two memory accesses in potentially different blocks, + /// determine whether MemoryAccess \p A dominates MemoryAccess \p B. + bool dominates(const MemoryAccess *A, const MemoryAccess *B) const; + + /// \brief Given a MemoryAccess and a Use, determine whether MemoryAccess \p A + /// dominates Use \p B. + bool dominates(const MemoryAccess *A, const Use &B) const; + + /// \brief Verify that MemorySSA is self consistent (IE definitions dominate + /// all uses, uses appear in the right places). This is used by unit tests. + void verifyMemorySSA() const; + + /// Used in various insertion functions to specify whether we are talking + /// about the beginning or end of a block. + enum InsertionPlace { Beginning, End }; + +protected: + // Used by Memory SSA annotater, dumpers, and wrapper pass + friend class MemorySSAAnnotatedWriter; + friend class MemorySSAPrinterLegacyPass; + friend class MemorySSAUpdater; + + void verifyDefUses(Function &F) const; + void verifyDomination(Function &F) const; + void verifyOrdering(Function &F) const; + + // This is used by the use optimizer and updater. + AccessList *getWritableBlockAccesses(const BasicBlock *BB) const { + auto It = PerBlockAccesses.find(BB); + return It == PerBlockAccesses.end() ? nullptr : It->second.get(); + } + + // This is used by the use optimizer and updater. + DefsList *getWritableBlockDefs(const BasicBlock *BB) const { + auto It = PerBlockDefs.find(BB); + return It == PerBlockDefs.end() ? nullptr : It->second.get(); + } + + // These is used by the updater to perform various internal MemorySSA + // machinsations. They do not always leave the IR in a correct state, and + // relies on the updater to fixup what it breaks, so it is not public. + + void moveTo(MemoryUseOrDef *What, BasicBlock *BB, AccessList::iterator Where); + void moveTo(MemoryUseOrDef *What, BasicBlock *BB, InsertionPlace Point); + // Rename the dominator tree branch rooted at BB. + void renamePass(BasicBlock *BB, MemoryAccess *IncomingVal, + SmallPtrSetImpl<BasicBlock *> &Visited) { + renamePass(DT->getNode(BB), IncomingVal, Visited, true, true); + } + void removeFromLookups(MemoryAccess *); + void removeFromLists(MemoryAccess *, bool ShouldDelete = true); + void insertIntoListsForBlock(MemoryAccess *, const BasicBlock *, + InsertionPlace); + void insertIntoListsBefore(MemoryAccess *, const BasicBlock *, + AccessList::iterator); + MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *); + +private: + class CachingWalker; + class OptimizeUses; + + CachingWalker *getWalkerImpl(); + void buildMemorySSA(); + void optimizeUses(); + + void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const; + using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>; + using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>; + + void + determineInsertionPoint(const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks); + void markUnreachableAsLiveOnEntry(BasicBlock *BB); + bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; + MemoryPhi *createMemoryPhi(BasicBlock *BB); + MemoryUseOrDef *createNewAccess(Instruction *); + MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); + void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &, + const DenseMap<const BasicBlock *, unsigned int> &); + MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool); + void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool); + void renamePass(DomTreeNode *, MemoryAccess *IncomingVal, + SmallPtrSetImpl<BasicBlock *> &Visited, + bool SkipVisited = false, bool RenameAllUses = false); + AccessList *getOrCreateAccessList(const BasicBlock *); + DefsList *getOrCreateDefsList(const BasicBlock *); + void renumberBlock(const BasicBlock *) const; + AliasAnalysis *AA; + DominatorTree *DT; + Function &F; + + // Memory SSA mappings + DenseMap<const Value *, MemoryAccess *> ValueToMemoryAccess; + // These two mappings contain the main block to access/def mappings for + // MemorySSA. The list contained in PerBlockAccesses really owns all the + // MemoryAccesses. + // Both maps maintain the invariant that if a block is found in them, the + // corresponding list is not empty, and if a block is not found in them, the + // corresponding list is empty. + AccessMap PerBlockAccesses; + DefsMap PerBlockDefs; + std::unique_ptr<MemoryAccess> LiveOnEntryDef; + + // Domination mappings + // Note that the numbering is local to a block, even though the map is + // global. + mutable SmallPtrSet<const BasicBlock *, 16> BlockNumberingValid; + mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering; + + // Memory SSA building info + std::unique_ptr<CachingWalker> Walker; + unsigned NextID; +}; + +// Internal MemorySSA utils, for use by MemorySSA classes and walkers +class MemorySSAUtil { +protected: + friend class MemorySSAWalker; + friend class GVNHoist; + // This function should not be used by new passes. + static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, + AliasAnalysis &AA); +}; + +// This pass does eager building and then printing of MemorySSA. It is used by +// the tests to be able to build, dump, and verify Memory SSA. +class MemorySSAPrinterLegacyPass : public FunctionPass { +public: + MemorySSAPrinterLegacyPass(); + + bool runOnFunction(Function &) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + static char ID; +}; + +/// An analysis that produces \c MemorySSA for a function. +/// +class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> { + friend AnalysisInfoMixin<MemorySSAAnalysis>; + + static AnalysisKey Key; + +public: + // Wrap MemorySSA result to ensure address stability of internal MemorySSA + // pointers after construction. Use a wrapper class instead of plain + // unique_ptr<MemorySSA> to avoid build breakage on MSVC. + struct Result { + Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {} + MemorySSA &getMSSA() { return *MSSA.get(); } + + std::unique_ptr<MemorySSA> MSSA; + }; + + Result run(Function &F, FunctionAnalysisManager &AM); +}; + +/// \brief Printer pass for \c MemorySSA. +class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> { + raw_ostream &OS; + +public: + explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// \brief Verifier pass for \c MemorySSA. +struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// \brief Legacy analysis pass which computes \c MemorySSA. +class MemorySSAWrapperPass : public FunctionPass { +public: + MemorySSAWrapperPass(); + + static char ID; + + bool runOnFunction(Function &) override; + void releaseMemory() override; + MemorySSA &getMSSA() { return *MSSA; } + const MemorySSA &getMSSA() const { return *MSSA; } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void verifyAnalysis() const override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + +private: + std::unique_ptr<MemorySSA> MSSA; +}; + +/// \brief This is the generic walker interface for walkers of MemorySSA. +/// Walkers are used to be able to further disambiguate the def-use chains +/// MemorySSA gives you, or otherwise produce better info than MemorySSA gives +/// you. +/// In particular, while the def-use chains provide basic information, and are +/// guaranteed to give, for example, the nearest may-aliasing MemoryDef for a +/// MemoryUse as AliasAnalysis considers it, a user mant want better or other +/// information. In particular, they may want to use SCEV info to further +/// disambiguate memory accesses, or they may want the nearest dominating +/// may-aliasing MemoryDef for a call or a store. This API enables a +/// standardized interface to getting and using that info. +class MemorySSAWalker { +public: + MemorySSAWalker(MemorySSA *); + virtual ~MemorySSAWalker() = default; + + using MemoryAccessSet = SmallVector<MemoryAccess *, 8>; + + /// \brief Given a memory Mod/Ref/ModRef'ing instruction, calling this + /// will give you the nearest dominating MemoryAccess that Mod's the location + /// the instruction accesses (by skipping any def which AA can prove does not + /// alias the location(s) accessed by the instruction given). + /// + /// Note that this will return a single access, and it must dominate the + /// Instruction, so if an operand of a MemoryPhi node Mod's the instruction, + /// this will return the MemoryPhi, not the operand. This means that + /// given: + /// if (a) { + /// 1 = MemoryDef(liveOnEntry) + /// store %a + /// } else { + /// 2 = MemoryDef(liveOnEntry) + /// store %b + /// } + /// 3 = MemoryPhi(2, 1) + /// MemoryUse(3) + /// load %a + /// + /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef + /// in the if (a) branch. + MemoryAccess *getClobberingMemoryAccess(const Instruction *I) { + MemoryAccess *MA = MSSA->getMemoryAccess(I); + assert(MA && "Handed an instruction that MemorySSA doesn't recognize?"); + return getClobberingMemoryAccess(MA); + } + + /// Does the same thing as getClobberingMemoryAccess(const Instruction *I), + /// but takes a MemoryAccess instead of an Instruction. + virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0; + + /// \brief Given a potentially clobbering memory access and a new location, + /// calling this will give you the nearest dominating clobbering MemoryAccess + /// (by skipping non-aliasing def links). + /// + /// This version of the function is mainly used to disambiguate phi translated + /// pointers, where the value of a pointer may have changed from the initial + /// memory access. Note that this expects to be handed either a MemoryUse, + /// or an already potentially clobbering access. Unlike the above API, if + /// given a MemoryDef that clobbers the pointer as the starting access, it + /// will return that MemoryDef, whereas the above would return the clobber + /// starting from the use side of the memory def. + virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + const MemoryLocation &) = 0; + + /// \brief Given a memory access, invalidate anything this walker knows about + /// that access. + /// This API is used by walkers that store information to perform basic cache + /// invalidation. This will be called by MemorySSA at appropriate times for + /// the walker it uses or returns. + virtual void invalidateInfo(MemoryAccess *) {} + + virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); } + +protected: + friend class MemorySSA; // For updating MSSA pointer in MemorySSA move + // constructor. + MemorySSA *MSSA; +}; + +/// \brief A MemorySSAWalker that does no alias queries, or anything else. It +/// simply returns the links as they were constructed by the builder. +class DoNothingMemorySSAWalker final : public MemorySSAWalker { +public: + // Keep the overrides below from hiding the Instruction overload of + // getClobberingMemoryAccess. + using MemorySSAWalker::getClobberingMemoryAccess; + + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + const MemoryLocation &) override; +}; + +using MemoryAccessPair = std::pair<MemoryAccess *, MemoryLocation>; +using ConstMemoryAccessPair = std::pair<const MemoryAccess *, MemoryLocation>; + +/// \brief Iterator base class used to implement const and non-const iterators +/// over the defining accesses of a MemoryAccess. +template <class T> +class memoryaccess_def_iterator_base + : public iterator_facade_base<memoryaccess_def_iterator_base<T>, + std::forward_iterator_tag, T, ptrdiff_t, T *, + T *> { + using BaseT = typename memoryaccess_def_iterator_base::iterator_facade_base; + +public: + memoryaccess_def_iterator_base(T *Start) : Access(Start) {} + memoryaccess_def_iterator_base() = default; + + bool operator==(const memoryaccess_def_iterator_base &Other) const { + return Access == Other.Access && (!Access || ArgNo == Other.ArgNo); + } + + // This is a bit ugly, but for MemoryPHI's, unlike PHINodes, you can't get the + // block from the operand in constant time (In a PHINode, the uselist has + // both, so it's just subtraction). We provide it as part of the + // iterator to avoid callers having to linear walk to get the block. + // If the operation becomes constant time on MemoryPHI's, this bit of + // abstraction breaking should be removed. + BasicBlock *getPhiArgBlock() const { + MemoryPhi *MP = dyn_cast<MemoryPhi>(Access); + assert(MP && "Tried to get phi arg block when not iterating over a PHI"); + return MP->getIncomingBlock(ArgNo); + } + typename BaseT::iterator::pointer operator*() const { + assert(Access && "Tried to access past the end of our iterator"); + // Go to the first argument for phis, and the defining access for everything + // else. + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) + return MP->getIncomingValue(ArgNo); + return cast<MemoryUseOrDef>(Access)->getDefiningAccess(); + } + using BaseT::operator++; + memoryaccess_def_iterator &operator++() { + assert(Access && "Hit end of iterator"); + if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) { + if (++ArgNo >= MP->getNumIncomingValues()) { + ArgNo = 0; + Access = nullptr; + } + } else { + Access = nullptr; + } + return *this; + } + +private: + T *Access = nullptr; + unsigned ArgNo = 0; +}; + +inline memoryaccess_def_iterator MemoryAccess::defs_begin() { + return memoryaccess_def_iterator(this); +} + +inline const_memoryaccess_def_iterator MemoryAccess::defs_begin() const { + return const_memoryaccess_def_iterator(this); +} + +inline memoryaccess_def_iterator MemoryAccess::defs_end() { + return memoryaccess_def_iterator(); +} + +inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const { + return const_memoryaccess_def_iterator(); +} + +/// \brief GraphTraits for a MemoryAccess, which walks defs in the normal case, +/// and uses in the inverse case. +template <> struct GraphTraits<MemoryAccess *> { + using NodeRef = MemoryAccess *; + using ChildIteratorType = memoryaccess_def_iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { return N->defs_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->defs_end(); } +}; + +template <> struct GraphTraits<Inverse<MemoryAccess *>> { + using NodeRef = MemoryAccess *; + using ChildIteratorType = MemoryAccess::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { return N->user_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->user_end(); } +}; + +/// \brief Provide an iterator that walks defs, giving both the memory access, +/// and the current pointer location, updating the pointer location as it +/// changes due to phi node translation. +/// +/// This iterator, while somewhat specialized, is what most clients actually +/// want when walking upwards through MemorySSA def chains. It takes a pair of +/// <MemoryAccess,MemoryLocation>, and walks defs, properly translating the +/// memory location through phi nodes for the user. +class upward_defs_iterator + : public iterator_facade_base<upward_defs_iterator, + std::forward_iterator_tag, + const MemoryAccessPair> { + using BaseT = upward_defs_iterator::iterator_facade_base; + +public: + upward_defs_iterator(const MemoryAccessPair &Info) + : DefIterator(Info.first), Location(Info.second), + OriginalAccess(Info.first) { + CurrentPair.first = nullptr; + + WalkingPhi = Info.first && isa<MemoryPhi>(Info.first); + fillInCurrentPair(); + } + + upward_defs_iterator() { CurrentPair.first = nullptr; } + + bool operator==(const upward_defs_iterator &Other) const { + return DefIterator == Other.DefIterator; + } + + BaseT::iterator::reference operator*() const { + assert(DefIterator != OriginalAccess->defs_end() && + "Tried to access past the end of our iterator"); + return CurrentPair; + } + + using BaseT::operator++; + upward_defs_iterator &operator++() { + assert(DefIterator != OriginalAccess->defs_end() && + "Tried to access past the end of the iterator"); + ++DefIterator; + if (DefIterator != OriginalAccess->defs_end()) + fillInCurrentPair(); + return *this; + } + + BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); } + +private: + void fillInCurrentPair() { + CurrentPair.first = *DefIterator; + if (WalkingPhi && Location.Ptr) { + PHITransAddr Translator( + const_cast<Value *>(Location.Ptr), + OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr); + if (!Translator.PHITranslateValue(OriginalAccess->getBlock(), + DefIterator.getPhiArgBlock(), nullptr, + false)) + if (Translator.getAddr() != Location.Ptr) { + CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); + return; + } + } + CurrentPair.second = Location; + } + + MemoryAccessPair CurrentPair; + memoryaccess_def_iterator DefIterator; + MemoryLocation Location; + MemoryAccess *OriginalAccess = nullptr; + bool WalkingPhi = false; +}; + +inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair) { + return upward_defs_iterator(Pair); +} + +inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); } + +inline iterator_range<upward_defs_iterator> +upward_defs(const MemoryAccessPair &Pair) { + return make_range(upward_defs_begin(Pair), upward_defs_end()); +} + +/// Walks the defining accesses of MemoryDefs. Stops after we hit something that +/// has no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when +/// comparing against a null def_chain_iterator, this will compare equal only +/// after walking said Phi/liveOnEntry. +/// +/// The UseOptimizedChain flag specifies whether to walk the clobbering +/// access chain, or all the accesses. +/// +/// Normally, MemoryDef are all just def/use linked together, so a def_chain on +/// a MemoryDef will walk all MemoryDefs above it in the program until it hits +/// a phi node. The optimized chain walks the clobbering access of a store. +/// So if you are just trying to find, given a store, what the next +/// thing that would clobber the same memory is, you want the optimized chain. +template <class T, bool UseOptimizedChain = false> +struct def_chain_iterator + : public iterator_facade_base<def_chain_iterator<T, UseOptimizedChain>, + std::forward_iterator_tag, MemoryAccess *> { + def_chain_iterator() : MA(nullptr) {} + def_chain_iterator(T MA) : MA(MA) {} + + T operator*() const { return MA; } + + def_chain_iterator &operator++() { + // N.B. liveOnEntry has a null defining access. + if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) { + if (UseOptimizedChain && MUD->isOptimized()) + MA = MUD->getOptimized(); + else + MA = MUD->getDefiningAccess(); + } else { + MA = nullptr; + } + + return *this; + } + + bool operator==(const def_chain_iterator &O) const { return MA == O.MA; } + +private: + T MA; +}; + +template <class T> +inline iterator_range<def_chain_iterator<T>> +def_chain(T MA, MemoryAccess *UpTo = nullptr) { +#ifdef EXPENSIVE_CHECKS + assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) && + "UpTo isn't in the def chain!"); +#endif + return make_range(def_chain_iterator<T>(MA), def_chain_iterator<T>(UpTo)); +} + +template <class T> +inline iterator_range<def_chain_iterator<T, true>> optimized_def_chain(T MA) { + return make_range(def_chain_iterator<T, true>(MA), + def_chain_iterator<T, true>(nullptr)); +} + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYSSA_H diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h new file mode 100644 index 000000000000..d30eeeaa95b6 --- /dev/null +++ b/include/llvm/Analysis/MemorySSAUpdater.h @@ -0,0 +1,153 @@ +//===- MemorySSAUpdater.h - Memory SSA Updater-------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief An automatic updater for MemorySSA that handles arbitrary insertion, +// deletion, and moves. It performs phi insertion where necessary, and +// automatically updates the MemorySSA IR to be correct. +// While updating loads or removing instructions is often easy enough to not +// need this, updating stores should generally not be attemped outside this +// API. +// +// Basic API usage: +// Create the memory access you want for the instruction (this is mainly so +// we know where it is, without having to duplicate the entire set of create +// functions MemorySSA supports). +// Call insertDef or insertUse depending on whether it's a MemoryUse or a +// MemoryDef. +// That's it. +// +// For moving, first, move the instruction itself using the normal SSA +// instruction moving API, then just call moveBefore, moveAfter,or moveTo with +// the right arguments. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H +#define LLVM_ANALYSIS_MEMORYSSAUPDATER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Analysis/MemorySSA.h" + +namespace llvm { + +class Function; +class Instruction; +class MemoryAccess; +class LLVMContext; +class raw_ostream; + +class MemorySSAUpdater { +private: + MemorySSA *MSSA; + SmallVector<MemoryPhi *, 8> InsertedPHIs; + SmallPtrSet<BasicBlock *, 8> VisitedBlocks; + +public: + MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {} + /// Insert a definition into the MemorySSA IR. RenameUses will rename any use + /// below the new def block (and any inserted phis). RenameUses should be set + /// to true if the definition may cause new aliases for loads below it. This + /// is not the case for hoisting or sinking or other forms of code *movement*. + /// It *is* the case for straight code insertion. + /// For example: + /// store a + /// if (foo) { } + /// load a + /// + /// Moving the store into the if block, and calling insertDef, does not + /// require RenameUses. + /// However, changing it to: + /// store a + /// if (foo) { store b } + /// load a + /// Where a mayalias b, *does* require RenameUses be set to true. + void insertDef(MemoryDef *Def, bool RenameUses = false); + void insertUse(MemoryUse *Use); + void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where); + void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where); + void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, + MemorySSA::InsertionPlace Where); + + // The below are utility functions. Other than creation of accesses to pass + // to insertDef, and removeAccess to remove accesses, you should generally + // not attempt to update memoryssa yourself. It is very non-trivial to get + // the edge cases right, and the above calls already operate in near-optimal + // time bounds. + + /// \brief Create a MemoryAccess in MemorySSA at a specified point in a block, + /// with a specified clobbering definition. + /// + /// Returns the new MemoryAccess. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. The insertion place is used + /// solely to determine where in the memoryssa access lists the instruction + /// will be placed. The caller is expected to keep ordering the same as + /// instructions. + /// It will return the new MemoryAccess. + /// Note: If a MemoryAccess already exists for I, this function will make it + /// inaccessible and it *must* have removeMemoryAccess called on it. + MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, + const BasicBlock *BB, + MemorySSA::InsertionPlace Point); + + /// \brief Create a MemoryAccess in MemorySSA before or after an existing + /// MemoryAccess. + /// + /// Returns the new MemoryAccess. + /// This should be called when a memory instruction is created that is being + /// used to replace an existing memory instruction. It will *not* create PHI + /// nodes, or verify the clobbering definition. + /// + /// Note: If a MemoryAccess already exists for I, this function will make it + /// inaccessible and it *must* have removeMemoryAccess called on it. + MemoryUseOrDef *createMemoryAccessBefore(Instruction *I, + MemoryAccess *Definition, + MemoryUseOrDef *InsertPt); + MemoryUseOrDef *createMemoryAccessAfter(Instruction *I, + MemoryAccess *Definition, + MemoryAccess *InsertPt); + + /// \brief Remove a MemoryAccess from MemorySSA, including updating all + /// definitions and uses. + /// This should be called when a memory instruction that has a MemoryAccess + /// associated with it is erased from the program. For example, if a store or + /// load is simply erased (not replaced), removeMemoryAccess should be called + /// on the MemoryAccess for that store/load. + void removeMemoryAccess(MemoryAccess *); + +private: + // Move What before Where in the MemorySSA IR. + template <class WhereType> + void moveTo(MemoryUseOrDef *What, BasicBlock *BB, WhereType Where); + MemoryAccess *getPreviousDef(MemoryAccess *); + MemoryAccess *getPreviousDefInBlock(MemoryAccess *); + MemoryAccess *getPreviousDefFromEnd(BasicBlock *); + MemoryAccess *getPreviousDefRecursive(BasicBlock *); + MemoryAccess *recursePhi(MemoryAccess *Phi); + template <class RangeType> + MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); + void fixupDefs(const SmallVectorImpl<MemoryAccess *> &); +}; +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MEMORYSSAUPDATER_H diff --git a/include/llvm/Analysis/ObjectUtils.h b/include/llvm/Analysis/ObjectUtils.h new file mode 100644 index 000000000000..2ad3b1717009 --- /dev/null +++ b/include/llvm/Analysis/ObjectUtils.h @@ -0,0 +1,42 @@ +//===- Analysis/ObjectUtils.h - analysis utils for object files -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_OBJECT_UTILS_H +#define LLVM_ANALYSIS_OBJECT_UTILS_H + +#include "llvm/IR/GlobalVariable.h" + +namespace llvm { + +/// True if GV can be left out of the object symbol table. This is the case +/// for linkonce_odr values whose address is not significant. While legal, it is +/// not normally profitable to omit them from the .o symbol table. Using this +/// analysis makes sense when the information can be passed down to the linker +/// or we are in LTO. +inline bool canBeOmittedFromSymbolTable(const GlobalValue *GV) { + if (!GV->hasLinkOnceODRLinkage()) + return false; + + // We assume that anyone who sets global unnamed_addr on a non-constant knows + // what they're doing. + if (GV->hasGlobalUnnamedAddr()) + return true; + + // If it is a non constant variable, it needs to be uniqued across shared + // objects. + if (auto *Var = dyn_cast<GlobalVariable>(GV)) + if (!Var->isConstant()) + return false; + + return GV->hasAtLeastLocalUnnamedAddr(); +} + +} + +#endif diff --git a/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/include/llvm/Analysis/OptimizationDiagnosticInfo.h index 39269269c244..edd9140a3493 100644 --- a/include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ b/include/llvm/Analysis/OptimizationDiagnosticInfo.h @@ -38,7 +38,7 @@ class Value; /// enabled in the LLVM context. class OptimizationRemarkEmitter { public: - OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI) + OptimizationRemarkEmitter(const Function *F, BlockFrequencyInfo *BFI) : F(F), BFI(BFI) {} /// \brief This variant can be used to generate ORE on demand (without the @@ -52,7 +52,7 @@ public: /// operation since BFI and all its required analyses are computed. This is /// for example useful for CGSCC passes that can't use function analyses /// passes in the old PM. - OptimizationRemarkEmitter(Function *F); + OptimizationRemarkEmitter(const Function *F); OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg) : F(Arg.F), BFI(Arg.BFI) {} @@ -63,136 +63,16 @@ public: return *this; } - /// The new interface to emit remarks. - void emit(DiagnosticInfoOptimizationBase &OptDiag); - - /// Emit an optimization-applied message. - /// - /// \p PassName is the name of the pass emitting the message. If -Rpass= is - /// given and \p PassName matches the regular expression in -Rpass, then the - /// remark will be emitted. \p Fn is the function triggering the remark, \p - /// DLoc is the debug location where the diagnostic is generated. \p V is the - /// IR Value that identifies the code region. \p Msg is the message string to - /// use. - void emitOptimizationRemark(const char *PassName, const DebugLoc &DLoc, - const Value *V, const Twine &Msg); - - /// \brief Same as above but derives the IR Value for the code region and the - /// debug location from the Loop parameter \p L. - void emitOptimizationRemark(const char *PassName, Loop *L, const Twine &Msg); - - /// \brief Same as above but derives the debug location and the code region - /// from the debug location and the basic block of \p Inst, respectively. - void emitOptimizationRemark(const char *PassName, Instruction *Inst, - const Twine &Msg) { - emitOptimizationRemark(PassName, Inst->getDebugLoc(), Inst->getParent(), - Msg); - } - - /// Emit an optimization-missed message. - /// - /// \p PassName is the name of the pass emitting the message. If - /// -Rpass-missed= is given and the name matches the regular expression in - /// -Rpass, then the remark will be emitted. \p DLoc is the debug location - /// where the diagnostic is generated. \p V is the IR Value that identifies - /// the code region. \p Msg is the message string to use. If \p IsVerbose is - /// true, the message is considered verbose and will only be emitted when - /// verbose output is turned on. - void emitOptimizationRemarkMissed(const char *PassName, const DebugLoc &DLoc, - const Value *V, const Twine &Msg, - bool IsVerbose = false); - - /// \brief Same as above but derives the IR Value for the code region and the - /// debug location from the Loop parameter \p L. - void emitOptimizationRemarkMissed(const char *PassName, Loop *L, - const Twine &Msg, bool IsVerbose = false); - - /// \brief Same as above but derives the debug location and the code region - /// from the debug location and the basic block of \p Inst, respectively. - void emitOptimizationRemarkMissed(const char *PassName, Instruction *Inst, - const Twine &Msg, bool IsVerbose = false) { - emitOptimizationRemarkMissed(PassName, Inst->getDebugLoc(), - Inst->getParent(), Msg, IsVerbose); - } - - /// Emit an optimization analysis remark message. - /// - /// \p PassName is the name of the pass emitting the message. If - /// -Rpass-analysis= is given and \p PassName matches the regular expression - /// in -Rpass, then the remark will be emitted. \p DLoc is the debug location - /// where the diagnostic is generated. \p V is the IR Value that identifies - /// the code region. \p Msg is the message string to use. If \p IsVerbose is - /// true, the message is considered verbose and will only be emitted when - /// verbose output is turned on. - void emitOptimizationRemarkAnalysis(const char *PassName, - const DebugLoc &DLoc, const Value *V, - const Twine &Msg, bool IsVerbose = false); - - /// \brief Same as above but derives the IR Value for the code region and the - /// debug location from the Loop parameter \p L. - void emitOptimizationRemarkAnalysis(const char *PassName, Loop *L, - const Twine &Msg, bool IsVerbose = false); + /// Handle invalidation events in the new pass manager. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); - /// \brief Same as above but derives the debug location and the code region - /// from the debug location and the basic block of \p Inst, respectively. - void emitOptimizationRemarkAnalysis(const char *PassName, Instruction *Inst, - const Twine &Msg, - bool IsVerbose = false) { - emitOptimizationRemarkAnalysis(PassName, Inst->getDebugLoc(), - Inst->getParent(), Msg, IsVerbose); - } - - /// \brief This variant allows specifying what should be emitted for missed - /// and analysis remarks in one call. - /// - /// \p PassName is the name of the pass emitting the message. If - /// -Rpass-missed= is given and \p PassName matches the regular expression, \p - /// MsgForMissedRemark is emitted. - /// - /// If -Rpass-analysis= is given and \p PassName matches the regular - /// expression, \p MsgForAnalysisRemark is emitted. - /// - /// The debug location and the code region is derived from \p Inst. If \p - /// IsVerbose is true, the message is considered verbose and will only be - /// emitted when verbose output is turned on. - void emitOptimizationRemarkMissedAndAnalysis( - const char *PassName, Instruction *Inst, const Twine &MsgForMissedRemark, - const Twine &MsgForAnalysisRemark, bool IsVerbose = false) { - emitOptimizationRemarkAnalysis(PassName, Inst, MsgForAnalysisRemark, - IsVerbose); - emitOptimizationRemarkMissed(PassName, Inst, MsgForMissedRemark, IsVerbose); - } - - /// \brief Emit an optimization analysis remark related to floating-point - /// non-commutativity. + /// \brief Output the remark via the diagnostic handler and to the + /// optimization record file. /// - /// \p PassName is the name of the pass emitting the message. If - /// -Rpass-analysis= is given and \p PassName matches the regular expression - /// in -Rpass, then the remark will be emitted. \p Fn is the function - /// triggering the remark, \p DLoc is the debug location where the diagnostic - /// is generated.\p V is the IR Value that identifies the code region. \p Msg - /// is the message string to use. - void emitOptimizationRemarkAnalysisFPCommute(const char *PassName, - const DebugLoc &DLoc, - const Value *V, - const Twine &Msg); - - /// \brief Emit an optimization analysis remark related to pointer aliasing. - /// - /// \p PassName is the name of the pass emitting the message. If - /// -Rpass-analysis= is given and \p PassName matches the regular expression - /// in -Rpass, then the remark will be emitted. \p Fn is the function - /// triggering the remark, \p DLoc is the debug location where the diagnostic - /// is generated.\p V is the IR Value that identifies the code region. \p Msg - /// is the message string to use. - void emitOptimizationRemarkAnalysisAliasing(const char *PassName, - const DebugLoc &DLoc, - const Value *V, const Twine &Msg); - - /// \brief Same as above but derives the IR Value for the code region and the - /// debug location from the Loop parameter \p L. - void emitOptimizationRemarkAnalysisAliasing(const char *PassName, Loop *L, - const Twine &Msg); + /// This is the new interface that should be now used rather than the legacy + /// emit* APIs. + void emit(DiagnosticInfoOptimizationBase &OptDiag); /// \brief Whether we allow for extra compile-time budget to perform more /// analysis to produce fewer false positives. @@ -208,7 +88,7 @@ public: } private: - Function *F; + const Function *F; BlockFrequencyInfo *BFI; @@ -220,7 +100,7 @@ private: Optional<uint64_t> computeHotness(const Value *V); /// Similar but use value from \p OptDiag and update hotness there. - void computeHotness(DiagnosticInfoOptimizationBase &OptDiag); + void computeHotness(DiagnosticInfoIROptimization &OptDiag); /// \brief Only allow verbose messages if we know we're filtering by hotness /// (BFI is only set in this case). @@ -274,5 +154,11 @@ public: /// \brief Run the analysis pass over a function and produce BFI. Result run(Function &F, FunctionAnalysisManager &AM); }; + +namespace yaml { +template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> { + static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag); +}; +} } #endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index 34361dac8c16..94ee3b03bb86 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -26,6 +26,10 @@ struct PostDominatorTree : public DominatorTreeBase<BasicBlock> { typedef DominatorTreeBase<BasicBlock> Base; PostDominatorTree() : DominatorTreeBase<BasicBlock>(true) {} + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); }; /// \brief Analysis pass which computes a \c PostDominatorTree. diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index d7fe76e278e3..1aec35c3e677 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -29,6 +29,7 @@ namespace llvm { class BasicBlock; class BlockFrequencyInfo; +class CallSite; class ProfileSummary; /// \brief Analysis providing profile information. /// @@ -44,7 +45,7 @@ class ProfileSummaryInfo { private: Module &M; std::unique_ptr<ProfileSummary> Summary; - void computeSummary(); + bool computeSummary(); void computeThresholds(); // Count thresholds to answer isHotCount and isColdCount queries. Optional<uint64_t> HotCountThreshold, ColdCountThreshold; @@ -53,16 +54,29 @@ public: ProfileSummaryInfo(Module &M) : M(M) {} ProfileSummaryInfo(ProfileSummaryInfo &&Arg) : M(Arg.M), Summary(std::move(Arg.Summary)) {} + /// Returns the profile count for \p CallInst. + static Optional<uint64_t> getProfileCount(const Instruction *CallInst, + BlockFrequencyInfo *BFI); /// \brief Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); + /// Returns true if \p F has hot function entry or hot call edge. + bool isFunctionHotInCallGraph(const Function *F); /// \brief Returns true if \p F has cold function entry. bool isFunctionEntryCold(const Function *F); + /// Returns true if \p F has cold function entry or cold call edge. + bool isFunctionColdInCallGraph(const Function *F); /// \brief Returns true if \p F is a hot function. bool isHotCount(uint64_t C); /// \brief Returns true if count \p C is considered cold. bool isColdCount(uint64_t C); /// \brief Returns true if BasicBlock \p B is considered hot. bool isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI); + /// \brief Returns true if BasicBlock \p B is considered cold. + bool isColdBB(const BasicBlock *B, BlockFrequencyInfo *BFI); + /// \brief Returns true if CallSite \p CS is considered hot. + bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); + /// \brief Returns true if Callsite \p CS is considered cold. + bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); }; /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h index 6e61fc3be384..2fe7c6725266 100644 --- a/include/llvm/Analysis/PtrUseVisitor.h +++ b/include/llvm/Analysis/PtrUseVisitor.h @@ -196,7 +196,10 @@ class PtrUseVisitor : protected InstVisitor<DerivedT>, typedef InstVisitor<DerivedT> Base; public: - PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {} + PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) { + static_assert(std::is_base_of<PtrUseVisitor, DerivedT>::value, + "Must pass the derived type to this template!"); + } /// \brief Recursively visit the uses of the given pointer. /// \returns An info struct about the pointer. See \c PtrInfo for details. diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index f2f27a137a88..caeb21db613e 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -886,6 +886,10 @@ public: return *this; } + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + // updateStatistics - Update statistic about created regions. void updateStatistics(Region *R) final; diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 1a93f9aa5fd2..9a50de540f2b 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -543,6 +543,12 @@ private: /// predicate by splitting it into a set of independent predicates. bool ProvingSplitPredicate; + /// Memoized values for the GetMinTrailingZeros + DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache; + + /// Private helper method for the GetMinTrailingZeros method + uint32_t GetMinTrailingZerosImpl(const SCEV *S); + /// Information about the number of loop iterations for which a loop exit's /// branch condition evaluates to the not-taken path. This is a temporary /// pair of exact and max expressions that are eventually summarized in @@ -600,14 +606,14 @@ private: /// Information about the number of times a particular loop exit may be /// reached before exiting the loop. struct ExitNotTakenInfo { - AssertingVH<BasicBlock> ExitingBlock; + PoisoningVH<BasicBlock> ExitingBlock; const SCEV *ExactNotTaken; std::unique_ptr<SCEVUnionPredicate> Predicate; bool hasAlwaysTruePredicate() const { return !Predicate || Predicate->isAlwaysTrue(); } - explicit ExitNotTakenInfo(AssertingVH<BasicBlock> ExitingBlock, + explicit ExitNotTakenInfo(PoisoningVH<BasicBlock> ExitingBlock, const SCEV *ExactNotTaken, std::unique_ptr<SCEVUnionPredicate> Predicate) : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken), @@ -972,6 +978,20 @@ private: /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. Here LHS is an operation that includes FoundLHS as one of its + /// arguments. + bool isImpliedViaOperations(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS, + unsigned Depth = 0); + + /// Test whether the condition described by Pred, LHS, and RHS is true. + /// Use only simple non-recursive types of checks, such as range analysis etc. + bool isKnownViaSimpleReasoning(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, @@ -1065,18 +1085,6 @@ private: bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, bool &Increasing); - /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" - /// is monotonically increasing or decreasing. In the former case set - /// `Increasing` to true and in the latter case set `Increasing` to false. - /// - /// A predicate is said to be monotonically increasing if may go from being - /// false to being true as the loop iterates, but never the other way - /// around. A predicate is said to be monotonically decreasing if may go - /// from being true to being false as the loop iterates, but never the other - /// way around. - bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, - bool &Increasing); - /// Return SCEV no-wrap flags that can be proven based on reasoning about /// how poison produced from no-wrap flags on this value (e.g. a nuw add) /// would trigger undefined behavior on overflow. @@ -1129,6 +1137,9 @@ public: /// return true. For pointer types, this is the pointer-sized integer type. Type *getEffectiveSCEVType(Type *Ty) const; + // Returns a wider type among {Ty1, Ty2}. + Type *getWiderType(Type *Ty1, Type *Ty2) const; + /// Return true if the SCEV is a scAddRecExpr or it contains /// scAddRecExpr. The result will be cached in HasRecMap. /// @@ -1152,7 +1163,8 @@ public: const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; @@ -1301,7 +1313,7 @@ public: /// /// Implemented in terms of the \c getSmallConstantTripCount overload with /// the single exiting block passed to it. See that routine for details. - unsigned getSmallConstantTripCount(Loop *L); + unsigned getSmallConstantTripCount(const Loop *L); /// Returns the maximum trip count of this loop as a normal unsigned /// value. Returns 0 if the trip count is unknown or not constant. This @@ -1310,12 +1322,12 @@ public: /// before taking the branch. For loops with multiple exits, it may not be /// the number times that the loop header executes if the loop exits /// prematurely via another branch. - unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock); + unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock); /// Returns the upper bound of the loop trip count as a normal unsigned /// value. /// Returns 0 if the trip count is unknown or not constant. - unsigned getSmallConstantMaxTripCount(Loop *L); + unsigned getSmallConstantMaxTripCount(const Loop *L); /// Returns the largest constant divisor of the trip count of the /// loop if it is a single-exit loop and we can compute a small maximum for @@ -1323,7 +1335,7 @@ public: /// /// Implemented in terms of the \c getSmallConstantTripMultiple overload with /// the single exiting block passed to it. See that routine for details. - unsigned getSmallConstantTripMultiple(Loop *L); + unsigned getSmallConstantTripMultiple(const Loop *L); /// Returns the largest constant divisor of the trip count of this loop as a /// normal unsigned value, if possible. This means that the actual trip @@ -1331,12 +1343,13 @@ public: /// count could very well be zero as well!). As explained in the comments /// for getSmallConstantTripCount, this assumes that control exits the loop /// via ExitingBlock. - unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock); + unsigned getSmallConstantTripMultiple(const Loop *L, + BasicBlock *ExitingBlock); /// Get the expression for the number of loop iterations for which this loop /// is guaranteed not to exit via ExitingBlock. Otherwise return /// SCEVCouldNotCompute. - const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock); + const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock); /// If the specified loop has a predictable backedge-taken count, return it, /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count @@ -1432,6 +1445,18 @@ public: bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" + /// is monotonically increasing or decreasing. In the former case set + /// `Increasing` to true and in the latter case set `Increasing` to false. + /// + /// A predicate is said to be monotonically increasing if may go from being + /// false to being true as the loop iterates, but never the other way + /// around. A predicate is said to be monotonically decreasing if may go + /// from being true to being false as the loop iterates, but never the other + /// way around. + bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, + bool &Increasing); + /// Return true if the result of the predicate LHS `Pred` RHS is loop /// invariant with respect to L. Set InvariantPred, InvariantLHS and /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the @@ -1613,6 +1638,10 @@ private: bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap); + /// Get add expr already created or create a new one + const SCEV *getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags); + private: FoldingSet<SCEV> UniqueSCEVs; FoldingSet<SCEVPredicate> UniquePreds; diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index fdcd8be00dde..2c693bceb24d 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -595,58 +595,82 @@ namespace llvm { const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); - return SE.getTruncateExpr(Operand, Expr->getType()); + return Operand == Expr->getOperand() + ? Expr + : SE.getTruncateExpr(Operand, Expr->getType()); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); - return SE.getZeroExtendExpr(Operand, Expr->getType()); + return Operand == Expr->getOperand() + ? Expr + : SE.getZeroExtendExpr(Operand, Expr->getType()); } const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); - return SE.getSignExtendExpr(Operand, Expr->getType()); + return Operand == Expr->getOperand() + ? Expr + : SE.getSignExtendExpr(Operand, Expr->getType()); } const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getAddExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getAddExpr(Operands); } const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getMulExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getMulExpr(Operands); } const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()), - ((SC*)this)->visit(Expr->getRHS())); + auto *LHS = ((SC *)this)->visit(Expr->getLHS()); + auto *RHS = ((SC *)this)->visit(Expr->getRHS()); + bool Changed = LHS != Expr->getLHS() || RHS != Expr->getRHS(); + return !Changed ? Expr : SE.getUDivExpr(LHS, RHS); } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getAddRecExpr(Operands, Expr->getLoop(), - Expr->getNoWrapFlags()); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr + : SE.getAddRecExpr(Operands, Expr->getLoop(), + Expr->getNoWrapFlags()); } const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getSMaxExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC *)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getSMaxExpr(Operands); } const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getUMaxExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getUMaxExpr(Operands); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h index 7c6423a21cfa..b73ad95278a0 100644 --- a/include/llvm/Analysis/ScalarEvolutionNormalization.h +++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -37,42 +37,33 @@ #define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" namespace llvm { -class Instruction; -class DominatorTree; class Loop; class ScalarEvolution; class SCEV; -class Value; -/// TransformKind - Different types of transformations that -/// TransformForPostIncUse can do. -enum TransformKind { - /// Normalize - Normalize according to the given loops. - Normalize, - /// NormalizeAutodetect - Detect post-inc opportunities on new expressions, - /// update the given loop set, and normalize. - NormalizeAutodetect, - /// Denormalize - Perform the inverse transform on the expression with the - /// given loop set. - Denormalize -}; - -/// PostIncLoopSet - A set of loops. typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet; -/// TransformForPostIncUse - Transform the given expression according to the -/// given transformation kind. -const SCEV *TransformForPostIncUse(TransformKind Kind, - const SCEV *S, - Instruction *User, - Value *OperandValToReplace, - PostIncLoopSet &Loops, - ScalarEvolution &SE, - DominatorTree &DT); +typedef function_ref<bool(const SCEVAddRecExpr *)> NormalizePredTy; + +/// Normalize \p S to be post-increment for all loops present in \p +/// Loops. +const SCEV *normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, + ScalarEvolution &SE); + +/// Normalize \p S for all add recurrence sub-expressions for which \p +/// Pred returns true. +const SCEV *normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred, + ScalarEvolution &SE); -} +/// Denormalize \p S to be post-increment for all loops present in \p +/// Loops. +const SCEV *denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, + ScalarEvolution &SE); +} // namespace llvm #endif diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 5d5e5b127e63..637fc7ed30dd 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -20,7 +20,7 @@ // One of TLI_DEFINE_ENUM/STRING are defined. #if defined(TLI_DEFINE_ENUM) -#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) enum_variant, +#define TLI_DEFINE_ENUM_INTERNAL(enum_variant) LibFunc_##enum_variant, #define TLI_DEFINE_STRING_INTERNAL(string_repr) #else #define TLI_DEFINE_ENUM_INTERNAL(enum_variant) diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 8675882431d5..944250cfd6ac 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -30,14 +30,12 @@ struct VecDesc { unsigned VectorizationFactor; }; - namespace LibFunc { - enum Func { + enum LibFunc { #define TLI_DEFINE_ENUM #include "llvm/Analysis/TargetLibraryInfo.def" - NumLibFuncs - }; - } + NumLibFuncs + }; /// Implementation of the target library information. /// @@ -48,9 +46,9 @@ struct VecDesc { class TargetLibraryInfoImpl { friend class TargetLibraryInfo; - unsigned char AvailableArray[(LibFunc::NumLibFuncs+3)/4]; + unsigned char AvailableArray[(NumLibFuncs+3)/4]; llvm::DenseMap<unsigned, std::string> CustomNames; - static StringRef const StandardNames[LibFunc::NumLibFuncs]; + static StringRef const StandardNames[NumLibFuncs]; bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param; enum AvailabilityState { @@ -58,11 +56,11 @@ class TargetLibraryInfoImpl { CustomName = 1, Unavailable = 0 // (memset to all zeros) }; - void setState(LibFunc::Func F, AvailabilityState State) { + void setState(LibFunc F, AvailabilityState State) { AvailableArray[F/4] &= ~(3 << 2*(F&3)); AvailableArray[F/4] |= State << 2*(F&3); } - AvailabilityState getState(LibFunc::Func F) const { + AvailabilityState getState(LibFunc F) const { return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3); } @@ -74,7 +72,7 @@ class TargetLibraryInfoImpl { /// Return true if the function type FTy is valid for the library function /// F, regardless of whether the function is available. - bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc::Func F, + bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, const DataLayout *DL) const; public: @@ -104,28 +102,28 @@ public: /// /// If it is one of the known library functions, return true and set F to the /// corresponding value. - bool getLibFunc(StringRef funcName, LibFunc::Func &F) const; + bool getLibFunc(StringRef funcName, LibFunc &F) const; /// Searches for a particular function name, also checking that its type is /// valid for the library function matching that name. /// /// If it is one of the known library functions, return true and set F to the /// corresponding value. - bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const; + bool getLibFunc(const Function &FDecl, LibFunc &F) const; /// Forces a function to be marked as unavailable. - void setUnavailable(LibFunc::Func F) { + void setUnavailable(LibFunc F) { setState(F, Unavailable); } /// Forces a function to be marked as available. - void setAvailable(LibFunc::Func F) { + void setAvailable(LibFunc F) { setState(F, StandardName); } /// Forces a function to be marked as available and provide an alternate name /// that must be used. - void setAvailableWithName(LibFunc::Func F, StringRef Name) { + void setAvailableWithName(LibFunc F, StringRef Name) { if (StandardNames[F] != Name) { setState(F, CustomName); CustomNames[F] = Name; @@ -225,16 +223,16 @@ public: /// /// If it is one of the known library functions, return true and set F to the /// corresponding value. - bool getLibFunc(StringRef funcName, LibFunc::Func &F) const { + bool getLibFunc(StringRef funcName, LibFunc &F) const { return Impl->getLibFunc(funcName, F); } - bool getLibFunc(const Function &FDecl, LibFunc::Func &F) const { + bool getLibFunc(const Function &FDecl, LibFunc &F) const { return Impl->getLibFunc(FDecl, F); } /// Tests whether a library function is available. - bool has(LibFunc::Func F) const { + bool has(LibFunc F) const { return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; } bool isFunctionVectorizable(StringRef F, unsigned VF) const { @@ -249,37 +247,37 @@ public: /// Tests if the function is both available and a candidate for optimized code /// generation. - bool hasOptimizedCodeGen(LibFunc::Func F) const { + bool hasOptimizedCodeGen(LibFunc F) const { if (Impl->getState(F) == TargetLibraryInfoImpl::Unavailable) return false; switch (F) { default: break; - case LibFunc::copysign: case LibFunc::copysignf: case LibFunc::copysignl: - case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: - case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl: - case LibFunc::cos: case LibFunc::cosf: case LibFunc::cosl: - case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: - case LibFunc::sqrt_finite: case LibFunc::sqrtf_finite: - case LibFunc::sqrtl_finite: - case LibFunc::fmax: case LibFunc::fmaxf: case LibFunc::fmaxl: - case LibFunc::fmin: case LibFunc::fminf: case LibFunc::fminl: - case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl: - case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl: - case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill: - case LibFunc::rint: case LibFunc::rintf: case LibFunc::rintl: - case LibFunc::round: case LibFunc::roundf: case LibFunc::roundl: - case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: - case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l: - case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l: - case LibFunc::memcmp: case LibFunc::strcmp: case LibFunc::strcpy: - case LibFunc::stpcpy: case LibFunc::strlen: case LibFunc::strnlen: - case LibFunc::memchr: case LibFunc::mempcpy: + case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: + case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: + case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: + case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: + case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: + case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: + case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: + case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: + case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: + case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: + case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: + case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: + case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: + case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: + case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l: + case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: + case LibFunc_memcmp: case LibFunc_strcmp: case LibFunc_strcpy: + case LibFunc_stpcpy: case LibFunc_strlen: case LibFunc_strnlen: + case LibFunc_memchr: case LibFunc_mempcpy: return true; } return false; } - StringRef getName(LibFunc::Func F) const { + StringRef getName(LibFunc F) const { auto State = Impl->getState(F); if (State == TargetLibraryInfoImpl::Unavailable) return StringRef(); diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 209f05c279d0..67196687d556 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -44,23 +44,26 @@ class Value; /// \brief Information about a load/store intrinsic defined by the target. struct MemIntrinsicInfo { - MemIntrinsicInfo() - : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0), - NumMemRefs(0), PtrVal(nullptr) {} - bool ReadMem; - bool WriteMem; - /// True only if this memory operation is non-volatile, non-atomic, and - /// unordered. (See LoadInst/StoreInst for details on each) - bool IsSimple; - // Same Id is set by the target for corresponding load/store intrinsics. - unsigned short MatchingId; - int NumMemRefs; - /// This is the pointer that the intrinsic is loading from or storing to. /// If this is non-null, then analysis/optimization passes can assume that /// this intrinsic is functionally equivalent to a load/store from this /// pointer. - Value *PtrVal; + Value *PtrVal = nullptr; + + // Ordering for atomic operations. + AtomicOrdering Ordering = AtomicOrdering::NotAtomic; + + // Same Id is set by the target for corresponding load/store intrinsics. + unsigned short MatchingId = 0; + + bool ReadMem = false; + bool WriteMem = false; + bool IsVolatile = false; + + bool isUnordered() const { + return (Ordering == AtomicOrdering::NotAtomic || + Ordering == AtomicOrdering::Unordered) && !IsVolatile; + } }; /// \brief This pass provides access to the codegen interfaces that are needed @@ -226,6 +229,24 @@ public: /// starting with the sources of divergence. bool isSourceOfDivergence(const Value *V) const; + /// Returns the address space ID for a target's 'flat' address space. Note + /// this is not necessarily the same as addrspace(0), which LLVM sometimes + /// refers to as the generic address space. The flat address space is a + /// generic address space that can be used access multiple segments of memory + /// with different address spaces. Access of a memory location through a + /// pointer with this address space is expected to be legal but slower + /// compared to the same memory location accessed through a pointer with a + /// different address space. + // + /// This is for for targets with different pointer representations which can + /// be converted with the addrspacecast instruction. If a pointer is converted + /// to this address space, optimizations should attempt to replace the access + /// with the source address space. + /// + /// \returns ~0u if the target does not have such a flat address space to + /// optimize away. + unsigned getFlatAddressSpace() const; + /// \brief Test whether calls to a function lower to actual program function /// calls. /// @@ -411,6 +432,16 @@ public: /// containing this constant value for the target. bool shouldBuildLookupTablesForConstant(Constant *C) const; + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) const; + + /// If target has efficient vector element load/store instructions, it can + /// return true here so that insertion/extraction costs are not added to + /// the scalarization cost of a load/store. + bool supportsEfficientVectorElementLoadStore() const; + /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; @@ -500,6 +531,12 @@ public: /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \return True if it should be considered for address type promotion. + /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is + /// profitable without finding other extensions fed by the same input. + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; + /// \return The size of a cache line in bytes. unsigned getCacheLineSize() const; @@ -540,8 +577,10 @@ public: Type *SubTp = nullptr) const; /// \return The expected cost of cast instructions, such as bitcast, trunc, - /// zext, etc. - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + /// zext, etc. If there is an existing instruction that holds Opcode, it + /// may be passed in the 'I' parameter. + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr) const; /// \return The expected cost of a sign- or zero-extended vector extract. Use /// -1 to indicate that there is no information about the index value. @@ -552,9 +591,11 @@ public: /// Phi, Ret, Br. int getCFInstrCost(unsigned Opcode) const; - /// \returns The expected cost of compare and select instructions. + /// \returns The expected cost of compare and select instructions. If there + /// is an existing instruction that holds Opcode, it may be passed in the + /// 'I' parameter. int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = nullptr) const; + Type *CondTy = nullptr, const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. @@ -562,7 +603,7 @@ public: /// \return The cost of Load and Store instructions. int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + unsigned AddressSpace, const Instruction *I = nullptr) const; /// \return The cost of masked Load and Store instructions. int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, @@ -605,13 +646,19 @@ public: /// ((v0+v2), (v1+v3), undef, undef) int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const; - /// \returns The cost of Intrinsic instructions. Types analysis only. + /// \returns The cost of Intrinsic instructions. Analyses the real arguments. + /// Three cases are handled: 1. scalar instruction 2. vector instruction + /// 3. scalar instruction which is to be vectorized with VF. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF) const; + ArrayRef<Value *> Args, FastMathFlags FMF, + unsigned VF = 1) const; - /// \returns The cost of Intrinsic instructions. Analyses the real arguments. + /// \returns The cost of Intrinsic instructions. Types analysis only. + /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the + /// arguments and the return value will be computed based on types. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF) const; + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX) const; /// \returns The cost of Call instructions. int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const; @@ -720,6 +767,7 @@ public: virtual int getUserCost(const User *U) = 0; virtual bool hasBranchDivergence() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; + virtual unsigned getFlatAddressSpace() = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; @@ -743,6 +791,11 @@ public: virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; + virtual unsigned + getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0; + virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) = 0; + virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; @@ -763,6 +816,8 @@ public: Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() = 0; virtual unsigned getPrefetchDistance() = 0; virtual unsigned getMinPrefetchStride() = 0; @@ -776,16 +831,17 @@ public: ArrayRef<const Value *> Args) = 0; virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) = 0; - virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; + virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) = 0; virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) = 0; virtual int getCFInstrCost(unsigned Opcode) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) = 0; + Type *CondTy, const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) = 0; + unsigned AddressSpace, const Instruction *I) = 0; virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) = 0; @@ -800,11 +856,10 @@ public: virtual int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, - FastMathFlags FMF) = 0; + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, - FastMathFlags FMF) = 0; + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; @@ -879,6 +934,11 @@ public: bool isSourceOfDivergence(const Value *V) override { return Impl.isSourceOfDivergence(V); } + + unsigned getFlatAddressSpace() override { + return Impl.getFlatAddressSpace(); + } + bool isLoweredToCall(const Function *F) override { return Impl.isLoweredToCall(F); } @@ -933,6 +993,19 @@ public: bool shouldBuildLookupTablesForConstant(Constant *C) override { return Impl.shouldBuildLookupTablesForConstant(C); } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) override { + return Impl.getScalarizationOverhead(Ty, Insert, Extract); + } + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) override { + return Impl.getOperandsScalarizationOverhead(Args, VF); + } + + bool supportsEfficientVectorElementLoadStore() override { + return Impl.supportsEfficientVectorElementLoadStore(); + } + bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } @@ -976,7 +1049,11 @@ public: unsigned getRegisterBitWidth(bool Vector) override { return Impl.getRegisterBitWidth(Vector); } - + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { + return Impl.shouldConsiderAddressTypePromotion( + I, AllowPromotionWithoutCommonHeader); + } unsigned getCacheLineSize() override { return Impl.getCacheLineSize(); } @@ -1003,8 +1080,9 @@ public: Type *SubTp) override { return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } - int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { - return Impl.getCastInstrCost(Opcode, Dst, Src); + int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) override { + return Impl.getCastInstrCost(Opcode, Dst, Src, I); } int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) override { @@ -1013,15 +1091,16 @@ public: int getCFInstrCost(unsigned Opcode) override { return Impl.getCFInstrCost(Opcode); } - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy); + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) override { + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) override { - return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + unsigned AddressSpace, const Instruction *I) override { + return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); } int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) override { @@ -1044,13 +1123,13 @@ public: return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, - FastMathFlags FMF) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); + FastMathFlags FMF, unsigned ScalarizationCostPassed) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, + ScalarizationCostPassed); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, - FastMathFlags FMF) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF); + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); } int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) override { diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index cafc40723c9d..9ab6b7445ab8 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -171,6 +171,10 @@ public: bool isSourceOfDivergence(const Value *V) { return false; } + unsigned getFlatAddressSpace () { + return -1; + } + bool isLoweredToCall(const Function *F) { // FIXME: These should almost certainly not be handled here, and instead // handled with the help of TLI or the target itself. This was largely @@ -251,6 +255,15 @@ public: bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return 0; + } + + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) { return 0; } + + bool supportsEfficientVectorElementLoadStore() { return false; } + bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } bool enableInterleavedAccessVectorization() { return false; } @@ -292,6 +305,13 @@ public: unsigned getRegisterBitWidth(bool Vector) { return 32; } + bool + shouldConsiderAddressTypePromotion(const Instruction &I, + bool &AllowPromotionWithoutCommonHeader) { + AllowPromotionWithoutCommonHeader = false; + return false; + } + unsigned getCacheLineSize() { return 0; } unsigned getPrefetchDistance() { return 0; } @@ -316,7 +336,8 @@ public: return 1; } - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return 1; } + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { return 1; } unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) { @@ -325,7 +346,8 @@ public: unsigned getCFInstrCost(unsigned Opcode) { return 1; } - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { return 1; } @@ -334,7 +356,7 @@ public: } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { return 1; } @@ -358,11 +380,12 @@ public: } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF) { + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) { return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF) { + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { return 1; } diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h index c3f688f5d7f1..17906ba4e392 100644 --- a/include/llvm/Analysis/TypeMetadataUtils.h +++ b/include/llvm/Analysis/TypeMetadataUtils.h @@ -32,14 +32,15 @@ struct DevirtCallSite { /// call sites based on the call and return them in DevirtCalls. void findDevirtualizableCallsForTypeTest( SmallVectorImpl<DevirtCallSite> &DevirtCalls, - SmallVectorImpl<CallInst *> &Assumes, CallInst *CI); + SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI); /// Given a call to the intrinsic @llvm.type.checked.load, find all /// devirtualizable call sites based on the call and return them in DevirtCalls. void findDevirtualizableCallsForTypeCheckedLoad( SmallVectorImpl<DevirtCallSite> &DevirtCalls, SmallVectorImpl<Instruction *> &LoadedPtrs, - SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, CallInst *CI); + SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, + const CallInst *CI); } #endif diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index aaf6f888e06f..e3c2f3bed227 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -31,6 +31,7 @@ template <typename T> class ArrayRef; class Instruction; class Loop; class LoopInfo; + class OptimizationRemarkEmitter; class MDNode; class StringRef; class TargetLibraryInfo; @@ -52,7 +53,8 @@ template <typename T> class ArrayRef; const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one @@ -86,8 +88,10 @@ template <typename T> class ArrayRef; /// Return true if the given value is known to be non-zero when defined. For /// vectors, return true if every element is known to be non-zero when - /// defined. Supports values with integer or pointer type and vectors of - /// integers. + /// defined. For pointers, if the context instruction and dominator tree are + /// specified, perform context-sensitive analysis and return true if the + /// pointer couldn't possibly be null at the specified instruction. + /// Supports values with integer or pointer type and vectors of integers. bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, @@ -167,13 +171,25 @@ template <typename T> class ArrayRef; bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth = 0); - /// Return true if we can prove that the specified FP value is either a NaN or - /// never less than 0.0. - /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0. + /// Return true if we can prove that the specified FP value is either NaN or + /// never less than -0.0. + /// + /// NaN --> true + /// +0 --> true + /// -0 --> true + /// x > +0 --> true + /// x < -0 --> false + /// bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); - /// \returns true if we can prove that the specified FP value has a 0 sign - /// bit. + /// Return true if we can prove that the specified FP value's sign bit is 0. + /// + /// NaN --> true/false (depending on the NaN's sign bit) + /// +0 --> true + /// -0 --> false + /// x > +0 --> true + /// x < -0 --> false + /// bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); /// If the specified value can be set by repeating the same byte in memory, diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index eaa068b89c77..6315e8408f05 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -1,4 +1,4 @@ -//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====// +//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H -#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H +#ifndef LLVM_ANALYSIS_VECTORUTILS_H +#define LLVM_ANALYSIS_VECTORUTILS_H #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/IRBuilder.h" namespace llvm { @@ -123,6 +124,58 @@ computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, /// This function always sets a (possibly null) value for each K in Kinds. Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); +/// \brief Create an interleave shuffle mask. +/// +/// This function creates a shuffle mask for interleaving \p NumVecs vectors of +/// vectorization factor \p VF into a single wide vector. The mask is of the +/// form: +/// +/// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> +/// +/// For example, the mask for VF = 4 and NumVecs = 2 is: +/// +/// <0, 4, 1, 5, 2, 6, 3, 7>. +Constant *createInterleaveMask(IRBuilder<> &Builder, unsigned VF, + unsigned NumVecs); + +/// \brief Create a stride shuffle mask. +/// +/// This function creates a shuffle mask whose elements begin at \p Start and +/// are incremented by \p Stride. The mask can be used to deinterleave an +/// interleaved vector into separate vectors of vectorization factor \p VF. The +/// mask is of the form: +/// +/// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> +/// +/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: +/// +/// <0, 2, 4, 6> +Constant *createStrideMask(IRBuilder<> &Builder, unsigned Start, + unsigned Stride, unsigned VF); + +/// \brief Create a sequential shuffle mask. +/// +/// This function creates shuffle mask whose elements are sequential and begin +/// at \p Start. The mask contains \p NumInts integers and is padded with \p +/// NumUndefs undef values. The mask is of the form: +/// +/// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> +/// +/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: +/// +/// <0, 1, 2, 3, undef, undef, undef, undef> +Constant *createSequentialMask(IRBuilder<> &Builder, unsigned Start, + unsigned NumInts, unsigned NumUndefs); + +/// \brief Concatenate a list of vectors. +/// +/// This function generates code that concatenate the vectors in \p Vecs into a +/// single large vector. The number of vectors should be greater than one, and +/// their element types should be the same. The number of elements in the +/// vectors should also be the same; however, if the last vector has fewer +/// elements, it will be padded with undefs. +Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs); + } // llvm namespace #endif |