diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /include/llvm/Analysis | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) |
Notes
Diffstat (limited to 'include/llvm/Analysis')
38 files changed, 1732 insertions, 208 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 948341554f23..282142f51bb3 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase { /// A pointer to the AAResults object that this AAResult is /// aggregated within. May be null if not aggregated. - AAResults *AAR; + AAResults *AAR = nullptr; /// Helper to dispatch calls back through the derived type. DerivedT &derived() { return static_cast<DerivedT &>(*this); } diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 34a509b7f4bb..187317e3831b 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -87,10 +87,11 @@ class AliasSet : public ilist_node<AliasSet> { AAInfo = NewAAInfo; else { AAMDNodes Intersection(AAInfo.intersect(NewAAInfo)); - if (!Intersection) { + if (!Intersection.TBAA || !Intersection.Scope || + !Intersection.NoAlias) { // NewAAInfo conflicts with AAInfo. AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey(); - return SizeChanged; + SizeChanged = true; } AAInfo = Intersection; } diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index b42846472f2e..0efbd59023d6 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -73,8 +73,8 @@ class AssumptionCache { /// Get the vector of assumptions which affect a value from the cache. SmallVector<WeakTrackingVH, 1> &getOrInsertAffectedValues(Value *V); - /// Copy affected values in the cache for OV to be affected values for NV. - void copyAffectedValuesInCache(Value *OV, Value *NV); + /// Move affected values in the cache for OV to be affected values for NV. + void transferAffectedValuesInCache(Value *OV, Value *NV); /// Flag tracking whether we have scanned the function yet. /// diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h index bb55e76ac86a..68f137ba622c 100644 --- a/include/llvm/Analysis/CFG.h +++ b/include/llvm/Analysis/CFG.h @@ -46,6 +46,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ); /// bool isCriticalEdge(const Instruction *TI, unsigned SuccNum, bool AllowIdenticalEdges = false); +bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ, + bool AllowIdenticalEdges = false); /// Determine whether instruction 'To' is reachable from 'From', without passing /// through any blocks in ExclusionSet, returning true if uncertain. diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h index 7c8b42b1d8d2..5f5e52af3d88 100644 --- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h +++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -41,7 +41,8 @@ class CFLAndersAAResult : public AAResultBase<CFLAndersAAResult> { class FunctionInfo; public: - explicit CFLAndersAAResult(const TargetLibraryInfo &TLI); + explicit CFLAndersAAResult( + std::function<const TargetLibraryInfo &(Function &F)> GetTLI); CFLAndersAAResult(CFLAndersAAResult &&RHS); ~CFLAndersAAResult(); @@ -74,7 +75,7 @@ private: /// Build summary for a given function FunctionInfo buildInfoFrom(const Function &); - const TargetLibraryInfo &TLI; + std::function<const TargetLibraryInfo &(Function &F)> GetTLI; /// Cached mapping of Functions to their StratifiedSets. /// If a function's sets are currently being built, it is marked diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h index cc7a47cd9a5f..135321616b7c 100644 --- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h +++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -42,7 +42,8 @@ class CFLSteensAAResult : public AAResultBase<CFLSteensAAResult> { class FunctionInfo; public: - explicit CFLSteensAAResult(const TargetLibraryInfo &TLI); + explicit CFLSteensAAResult( + std::function<const TargetLibraryInfo &(Function &)> GetTLI); CFLSteensAAResult(CFLSteensAAResult &&Arg); ~CFLSteensAAResult(); @@ -90,7 +91,7 @@ public: } private: - const TargetLibraryInfo &TLI; + std::function<const TargetLibraryInfo &(Function &)> GetTLI; /// Cached mapping of Functions to their StratifiedSets. /// If a function's sets are currently being built, it is marked diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index 8af5fb86995a..933f2210dafc 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -88,6 +88,7 @@ #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" @@ -583,10 +584,12 @@ public: SmallVectorImpl<WeakTrackingVH> &CallHandles) { assert(CallHandles.empty() && "Must start with a clear set of handles."); - SmallVector<CallCount, 4> CallCounts; + SmallDenseMap<Function *, CallCount> CallCounts; + CallCount CountLocal = {0, 0}; for (LazyCallGraph::Node &N : C) { - CallCounts.push_back({0, 0}); - CallCount &Count = CallCounts.back(); + CallCount &Count = + CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) + .first->second; for (Instruction &I : instructions(N.getFunction())) if (auto CS = CallSite(&I)) { if (CS.getCalledFunction()) { @@ -626,8 +629,6 @@ public: // Check that we didn't miss any update scenario. assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - assert((int)CallCounts.size() == C->size() && - "Cannot have changed the size of the SCC!"); // Check whether any of the handles were devirtualized. auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { @@ -642,7 +643,7 @@ public: if (!F) return false; - LLVM_DEBUG(dbgs() << "Found devirutalized call from " + LLVM_DEBUG(dbgs() << "Found devirtualized call from " << CS.getParent()->getParent()->getName() << " to " << F->getName() << "\n"); @@ -664,12 +665,20 @@ public: // manner of transformations such as DCE and other things, but seems to // work well in practice. if (!Devirt) - for (int i = 0, Size = C->size(); i < Size; ++i) - if (CallCounts[i].Indirect > NewCallCounts[i].Indirect && - CallCounts[i].Direct < NewCallCounts[i].Direct) { - Devirt = true; - break; + // Iterate over the keys in NewCallCounts, if Function also exists in + // CallCounts, make the check below. + for (auto &Pair : NewCallCounts) { + auto &CallCountNew = Pair.second; + auto CountIt = CallCounts.find(Pair.first); + if (CountIt != CallCounts.end()) { + const auto &CallCountOld = CountIt->second; + if (CallCountOld.Indirect > CallCountNew.Indirect && + CallCountOld.Direct < CallCountNew.Direct) { + Devirt = true; + break; + } } + } if (!Devirt) { PA.intersect(std::move(PassPA)); diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h index ca7abd34fea2..29921a51d5be 100644 --- a/include/llvm/Analysis/CaptureTracking.h +++ b/include/llvm/Analysis/CaptureTracking.h @@ -17,6 +17,7 @@ namespace llvm { class Value; class Use; + class DataLayout; class Instruction; class DominatorTree; class OrderedBasicBlock; @@ -83,6 +84,11 @@ namespace llvm { /// use U. Return true to stop the traversal or false to continue looking /// for more capturing instructions. virtual bool captured(const Use *U) = 0; + + /// isDereferenceableOrNull - Overload to allow clients with additional + /// knowledge about pointer dereferenceability to provide it and thereby + /// avoid conservative responses when a pointer is compared to null. + virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL); }; /// PointerMayBeCaptured - Visit the value and the values derived from it and diff --git a/include/llvm/Analysis/DDG.h b/include/llvm/Analysis/DDG.h new file mode 100644 index 000000000000..0e1eb9d2cda3 --- /dev/null +++ b/include/llvm/Analysis/DDG.h @@ -0,0 +1,430 @@ +//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Data-Dependence Graph (DDG). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DDG_H +#define LLVM_ANALYSIS_DDG_H + +#include "llvm/ADT/DirectedGraph.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/Instructions.h" +#include <unordered_map> + +namespace llvm { +class DDGNode; +class DDGEdge; +using DDGNodeBase = DGNode<DDGNode, DDGEdge>; +using DDGEdgeBase = DGEdge<DDGNode, DDGEdge>; +using DDGBase = DirectedGraph<DDGNode, DDGEdge>; +class LPMUpdater; + +/// Data Dependence Graph Node +/// The graph can represent the following types of nodes: +/// 1. Single instruction node containing just one instruction. +/// 2. Multiple instruction node where two or more instructions from +/// the same basic block are merged into one node. +/// 3. Root node is a special node that connects to all components such that +/// there is always a path from it to any node in the graph. +class DDGNode : public DDGNodeBase { +public: + using InstructionListType = SmallVectorImpl<Instruction *>; + + enum class NodeKind { + Unknown, + SingleInstruction, + MultiInstruction, + Root, + }; + + DDGNode() = delete; + DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {} + DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {} + DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {} + virtual ~DDGNode() = 0; + + DDGNode &operator=(const DDGNode &N) { + DGNode::operator=(N); + Kind = N.Kind; + return *this; + } + + DDGNode &operator=(DDGNode &&N) { + DGNode::operator=(std::move(N)); + Kind = N.Kind; + return *this; + } + + /// Getter for the kind of this node. + NodeKind getKind() const { return Kind; } + + /// Collect a list of instructions, in \p IList, for which predicate \p Pred + /// evaluates to true when iterating over instructions of this node. Return + /// true if at least one instruction was collected, and false otherwise. + bool collectInstructions(llvm::function_ref<bool(Instruction *)> const &Pred, + InstructionListType &IList) const; + +protected: + /// Setter for the kind of this node. + void setKind(NodeKind K) { Kind = K; } + +private: + NodeKind Kind; +}; + +/// Subclass of DDGNode representing the root node of the graph. +/// There should only be one such node in a given graph. +class RootDDGNode : public DDGNode { +public: + RootDDGNode() : DDGNode(NodeKind::Root) {} + RootDDGNode(const RootDDGNode &N) = delete; + RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {} + ~RootDDGNode() {} + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::Root; + } + static bool classof(const RootDDGNode *N) { return true; } +}; + +/// Subclass of DDGNode representing single or multi-instruction nodes. +class SimpleDDGNode : public DDGNode { +public: + SimpleDDGNode() = delete; + SimpleDDGNode(Instruction &I); + SimpleDDGNode(const SimpleDDGNode &N); + SimpleDDGNode(SimpleDDGNode &&N); + ~SimpleDDGNode(); + + SimpleDDGNode &operator=(const SimpleDDGNode &N) { + DDGNode::operator=(N); + InstList = N.InstList; + return *this; + } + + SimpleDDGNode &operator=(SimpleDDGNode &&N) { + DDGNode::operator=(std::move(N)); + InstList = std::move(N.InstList); + return *this; + } + + /// Get the list of instructions in this node. + const InstructionListType &getInstructions() const { + assert(!InstList.empty() && "Instruction List is empty."); + return InstList; + } + InstructionListType &getInstructions() { + return const_cast<InstructionListType &>( + static_cast<const SimpleDDGNode *>(this)->getInstructions()); + } + + /// Get the first/last instruction in the node. + Instruction *getFirstInstruction() const { return getInstructions().front(); } + Instruction *getLastInstruction() const { return getInstructions().back(); } + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::SingleInstruction || + N->getKind() == NodeKind::MultiInstruction; + } + static bool classof(const SimpleDDGNode *N) { return true; } + +private: + /// Append the list of instructions in \p Input to this node. + void appendInstructions(const InstructionListType &Input) { + setKind((InstList.size() == 0 && Input.size() == 1) + ? NodeKind::SingleInstruction + : NodeKind::MultiInstruction); + InstList.insert(InstList.end(), Input.begin(), Input.end()); + } + void appendInstructions(const SimpleDDGNode &Input) { + appendInstructions(Input.getInstructions()); + } + + /// List of instructions associated with a single or multi-instruction node. + SmallVector<Instruction *, 2> InstList; +}; + +/// Data Dependency Graph Edge. +/// An edge in the DDG can represent a def-use relationship or +/// a memory dependence based on the result of DependenceAnalysis. +/// A rooted edge connects the root node to one of the components +/// of the graph. +class DDGEdge : public DDGEdgeBase { +public: + /// The kind of edge in the DDG + enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted }; + + explicit DDGEdge(DDGNode &N) = delete; + DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {} + DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {} + DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {} + DDGEdge &operator=(const DDGEdge &E) { + DDGEdgeBase::operator=(E); + Kind = E.Kind; + return *this; + } + + DDGEdge &operator=(DDGEdge &&E) { + DDGEdgeBase::operator=(std::move(E)); + Kind = E.Kind; + return *this; + } + + /// Get the edge kind + EdgeKind getKind() const { return Kind; }; + + /// Return true if this is a def-use edge, and false otherwise. + bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; } + + /// Return true if this is a memory dependence edge, and false otherwise. + bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; } + + /// Return true if this is an edge stemming from the root node, and false + /// otherwise. + bool isRooted() const { return Kind == EdgeKind::Rooted; } + +private: + EdgeKind Kind; +}; + +/// Encapsulate some common data and functionality needed for different +/// variations of data dependence graphs. +template <typename NodeType> class DependenceGraphInfo { +public: + using DependenceList = SmallVector<std::unique_ptr<Dependence>, 1>; + + DependenceGraphInfo() = delete; + DependenceGraphInfo(const DependenceGraphInfo &G) = delete; + DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo) + : Name(N), DI(DepInfo), Root(nullptr) {} + DependenceGraphInfo(DependenceGraphInfo &&G) + : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {} + virtual ~DependenceGraphInfo() {} + + /// Return the label that is used to name this graph. + const StringRef getName() const { return Name; } + + /// Return the root node of the graph. + NodeType &getRoot() const { + assert(Root && "Root node is not available yet. Graph construction may " + "still be in progress\n"); + return *Root; + } + +protected: + // Name of the graph. + std::string Name; + + // Store a copy of DependenceInfo in the graph, so that individual memory + // dependencies don't need to be stored. Instead when the dependence is + // queried it is recomputed using @DI. + const DependenceInfo DI; + + // A special node in the graph that has an edge to every connected component of + // the graph, to ensure all nodes are reachable in a graph walk. + NodeType *Root = nullptr; +}; + +using DDGInfo = DependenceGraphInfo<DDGNode>; + +/// Data Dependency Graph +class DataDependenceGraph : public DDGBase, public DDGInfo { + friend class DDGBuilder; + +public: + using NodeType = DDGNode; + using EdgeType = DDGEdge; + + DataDependenceGraph() = delete; + DataDependenceGraph(const DataDependenceGraph &G) = delete; + DataDependenceGraph(DataDependenceGraph &&G) + : DDGBase(std::move(G)), DDGInfo(std::move(G)) {} + DataDependenceGraph(Function &F, DependenceInfo &DI); + DataDependenceGraph(const Loop &L, DependenceInfo &DI); + ~DataDependenceGraph(); + +protected: + /// Add node \p N to the graph, if it's not added yet, and keep track of + /// the root node. Return true if node is successfully added. + bool addNode(NodeType &N); + +}; + +/// Concrete implementation of a pure data dependence graph builder. This class +/// provides custom implementation for the pure-virtual functions used in the +/// generic dependence graph build algorithm. +/// +/// For information about time complexity of the build algorithm see the +/// comments near the declaration of AbstractDependenceGraphBuilder. +class DDGBuilder : public AbstractDependenceGraphBuilder<DataDependenceGraph> { +public: + DDGBuilder(DataDependenceGraph &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : AbstractDependenceGraphBuilder(G, D, BBs) {} + DDGNode &createRootNode() final override { + auto *RN = new RootDDGNode(); + assert(RN && "Failed to allocate memory for DDG root node."); + Graph.addNode(*RN); + return *RN; + } + DDGNode &createFineGrainedNode(Instruction &I) final override { + auto *SN = new SimpleDDGNode(I); + assert(SN && "Failed to allocate memory for simple DDG node."); + Graph.addNode(*SN); + return *SN; + } + DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted); + assert(E && "Failed to allocate memory for edge"); + assert(isa<RootDDGNode>(Src) && "Expected root node"); + Graph.connect(Src, Tgt, *E); + return *E; + } + +}; + +raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N); +raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G); + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// Analysis pass that builds the DDG for a loop. +class DDGAnalysis : public AnalysisInfoMixin<DDGAnalysis> { +public: + using Result = std::unique_ptr<DataDependenceGraph>; + Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR); + +private: + friend AnalysisInfoMixin<DDGAnalysis>; + static AnalysisKey Key; +}; + +/// Textual printer pass for the DDG of a loop. +class DDGAnalysisPrinterPass : public PassInfoMixin<DDGAnalysisPrinterPass> { +public: + explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + +private: + raw_ostream &OS; +}; + +//===--------------------------------------------------------------------===// +// GraphTraits specializations for the DDG +//===--------------------------------------------------------------------===// + +/// non-const versions of the grapth trait specializations for DDG +template <> struct GraphTraits<DDGNode *> { + using NodeRef = DDGNode *; + + static DDGNode *DDGGetTargetNode(DGEdge<DDGNode, DDGEdge> *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator<DDGNode::iterator, decltype(&DDGGetTargetNode)>; + using ChildEdgeIteratorType = DDGNode::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits<DataDependenceGraph *> : public GraphTraits<DDGNode *> { + using nodes_iterator = DataDependenceGraph::iterator; + static NodeRef getEntryNode(DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); } +}; + +/// const versions of the grapth trait specializations for DDG +template <> struct GraphTraits<const DDGNode *> { + using NodeRef = const DDGNode *; + + static const DDGNode *DDGGetTargetNode(const DGEdge<DDGNode, DDGEdge> *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator<DDGNode::const_iterator, decltype(&DDGGetTargetNode)>; + using ChildEdgeIteratorType = DDGNode::const_iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits<const DataDependenceGraph *> + : public GraphTraits<const DDGNode *> { + using nodes_iterator = DataDependenceGraph::const_iterator; + static NodeRef getEntryNode(const DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(const DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(const DataDependenceGraph *DG) { + return DG->end(); + } +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DDG_H diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h index 0410a3314659..c9e8df5db1c2 100644 --- a/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -99,7 +99,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph); std::string Title = GraphName + " for '" + F.getName().str() + "' function"; @@ -162,7 +162,7 @@ public: errs() << "Writing '" << Filename << "'..."; - raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + raw_fd_ostream File(Filename, EC, sys::fs::OF_Text); std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph); if (!EC) diff --git a/include/llvm/Analysis/DependenceGraphBuilder.h b/include/llvm/Analysis/DependenceGraphBuilder.h new file mode 100644 index 000000000000..5f4bdb47043b --- /dev/null +++ b/include/llvm/Analysis/DependenceGraphBuilder.h @@ -0,0 +1,119 @@ +//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a builder interface that can be used to populate dependence +// graphs such as DDG and PDG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H +#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H + +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" + +namespace llvm { + +/// This abstract builder class defines a set of high-level steps for creating +/// DDG-like graphs. The client code is expected to inherit from this class and +/// define concrete implementation for each of the pure virtual functions used +/// in the high-level algorithm. +template <class GraphType> class AbstractDependenceGraphBuilder { +protected: + using BasicBlockListType = SmallVectorImpl<BasicBlock *>; + +private: + using NodeType = typename GraphType::NodeType; + using EdgeType = typename GraphType::EdgeType; + +public: + using ClassesType = EquivalenceClasses<BasicBlock *>; + using NodeListType = SmallVector<NodeType *, 4>; + + AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : Graph(G), DI(D), BBList(BBs) {} + virtual ~AbstractDependenceGraphBuilder() {} + + /// The main entry to the graph construction algorithm. It starts by + /// creating nodes in increasing order of granularity and then + /// adds def-use and memory edges. + /// + /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V + /// is the number of vertecies (nodes) and I is the number of instructions in + /// each node. The total number of instructions, N, is equal to V * I, + /// therefore the worst-case time complexity is O(N^2). The average time + /// complexity is O((N^2)/2). + void populate() { + createFineGrainedNodes(); + createDefUseEdges(); + createMemoryDependencyEdges(); + createAndConnectRootNode(); + } + + /// Create fine grained nodes. These are typically atomic nodes that + /// consist of a single instruction. + void createFineGrainedNodes(); + + /// Analyze the def-use chains and create edges from the nodes containing + /// definitions to the nodes containing the uses. + void createDefUseEdges(); + + /// Analyze data dependencies that exist between memory loads or stores, + /// in the graph nodes and create edges between them. + void createMemoryDependencyEdges(); + + /// Create a root node and add edges such that each node in the graph is + /// reachable from the root. + void createAndConnectRootNode(); + +protected: + /// Create the root node of the graph. + virtual NodeType &createRootNode() = 0; + + /// Create an atomic node in the graph given a single instruction. + virtual NodeType &createFineGrainedNode(Instruction &I) = 0; + + /// Create a def-use edge going from \p Src to \p Tgt. + virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a memory dependence edge going from \p Src to \p Tgt. + virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a rooted edge going from \p Src to \p Tgt . + virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Deallocate memory of edge \p E. + virtual void destroyEdge(EdgeType &E) { delete &E; } + + /// Deallocate memory of node \p N. + virtual void destroyNode(NodeType &N) { delete &N; } + + /// Map types to map instructions to nodes used when populating the graph. + using InstToNodeMap = DenseMap<Instruction *, NodeType *>; + + /// Reference to the graph that gets built by a concrete implementation of + /// this builder. + GraphType &Graph; + + /// Dependence information used to create memory dependence edges in the + /// graph. + DependenceInfo &DI; + + /// The list of basic blocks to consider when building the graph. + const BasicBlockListType &BBList; + + /// A mapping from instructions to the corresponding nodes in the graph. + InstToNodeMap IMap; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h index 3cfb9d13df94..2fac9c8b4b34 100644 --- a/include/llvm/Analysis/DivergenceAnalysis.h +++ b/include/llvm/Analysis/DivergenceAnalysis.h @@ -73,9 +73,12 @@ public: /// operands bool isAlwaysUniform(const Value &Val) const; - /// \brief Whether \p Val is a divergent value + /// \brief Whether \p Val is divergent at its definition. bool isDivergent(const Value &Val) const; + /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + void print(raw_ostream &OS, const Module *) const; private: @@ -189,12 +192,19 @@ public: /// The GPU kernel this analysis result is for const Function &getFunction() const { return DA.getFunction(); } - /// Whether \p V is divergent. + /// Whether \p V is divergent at its definition. bool isDivergent(const Value &V) const; - /// Whether \p V is uniform/non-divergent + /// Whether \p U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use &U) const; + + /// Whether \p V is uniform/non-divergent. bool isUniform(const Value &V) const { return !isDivergent(V); } + /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be + /// divergent. + bool isUniformUse(const Use &U) const { return !isDivergentUse(U); } + /// Print all divergent values in the kernel. void print(raw_ostream &OS, const Module *) const; }; diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h index d3fcfc2d41ab..5d1c5a05206a 100644 --- a/include/llvm/Analysis/GlobalsModRef.h +++ b/include/llvm/Analysis/GlobalsModRef.h @@ -34,7 +34,7 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> { class FunctionInfo; const DataLayout &DL; - const TargetLibraryInfo &TLI; + std::function<const TargetLibraryInfo &(Function &F)> GetTLI; /// The globals that do not have their addresses taken. SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals; @@ -72,14 +72,18 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> { /// could perform to the memory utilization here if this becomes a problem. std::list<DeletionCallbackHandle> Handles; - explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI); + explicit GlobalsAAResult( + const DataLayout &DL, + std::function<const TargetLibraryInfo &(Function &F)> GetTLI); public: GlobalsAAResult(GlobalsAAResult &&Arg); ~GlobalsAAResult(); - static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI, - CallGraph &CG); + static GlobalsAAResult + analyzeModule(Module &M, + std::function<const TargetLibraryInfo &(Function &F)> GetTLI, + CallGraph &CG); //------------------------------------------------ // Implement the AliasAnalysis API diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index 054ffca7215e..a5ffca13046b 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -31,6 +31,7 @@ #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H +#include "llvm/ADT/SetVector.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" @@ -141,6 +142,13 @@ Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for the multiplication of a FMA, fold the result or return +/// null. In contrast to SimplifyFMulInst, this function will not perform +/// simplifications whose unrounded results differ when rounded to the argument +/// type. +Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + /// Given operands for a Mul, fold the result or return null. Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); @@ -234,21 +242,19 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, /// Given operand for a UnaryOperator, fold the result or return null. Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q); -/// Given operand for an FP UnaryOperator, fold the result or return null. -/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp. -Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operand for a UnaryOperator, fold the result or return null. +/// Try to use FastMathFlags when folding the result. +Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, + const SimplifyQuery &Q); /// Given operands for a BinaryOperator, fold the result or return null. Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q); -/// Given operands for an FP BinaryOperator, fold the result or return null. -/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the -/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. -Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for a BinaryOperator, fold the result or return null. +/// Try to use FastMathFlags when folding the result. +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); /// Given a callsite, fold the result or return null. Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q); @@ -263,12 +269,14 @@ Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, /// This first performs a normal RAUW of I with SimpleV. It then recursively /// attempts to simplify those users updated by the operation. The 'I' /// instruction must not be equal to the simplified value 'SimpleV'. +/// If UnsimplifiedUsers is provided, instructions that could not be simplified +/// are added to it. /// /// The function returns true if any simplifications were performed. -bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); +bool replaceAndRecursivelySimplify( + Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, + SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr); /// Recursively attempt to simplify an instruction. /// diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index 2d83929211e2..20a35bef189b 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -931,7 +931,8 @@ public: /// This sets up the graph and computes all of the entry points of the graph. /// No function definitions are scanned until their nodes in the graph are /// requested during traversal. - LazyCallGraph(Module &M, TargetLibraryInfo &TLI); + LazyCallGraph(Module &M, + function_ref<TargetLibraryInfo &(Function &)> GetTLI); LazyCallGraph(LazyCallGraph &&G); LazyCallGraph &operator=(LazyCallGraph &&RHS); @@ -1267,7 +1268,12 @@ public: /// This just builds the set of entry points to the call graph. The rest is /// built lazily as it is walked. LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) { - return LazyCallGraph(M, AM.getResult<TargetLibraryAnalysis>(M)); + FunctionAnalysisManager &FAM = + AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult<TargetLibraryAnalysis>(F); + }; + return LazyCallGraph(M, GetTLI); } }; diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h index 0a338b816640..e33b8f4129f3 100644 --- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h +++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h @@ -39,17 +39,18 @@ public: void print(raw_ostream &OS, const Module *) const override; // Returns true if V is divergent at its definition. - // - // Even if this function returns false, V may still be divergent when used - // in a different basic block. bool isDivergent(const Value *V) const; + // Returns true if U is divergent. Uses of a uniform value can be divergent. + bool isDivergentUse(const Use *U) const; + // Returns true if V is uniform/non-divergent. - // - // Even if this function returns true, V may still be divergent when used - // in a different basic block. bool isUniform(const Value *V) const { return !isDivergent(V); } + // Returns true if U is uniform/non-divergent. Uses of a uniform value can be + // divergent. + bool isUniformUse(const Use *U) const { return !isDivergentUse(U); } + // Keep the analysis results uptodate by removing an erased value. void removeValue(const Value *V) { DivergentValues.erase(V); } @@ -62,6 +63,9 @@ private: // Stores all divergent values. DenseSet<const Value *> DivergentValues; + + // Stores divergent uses of possibly uniform values. + DenseSet<const Use *> DivergentUses; }; } // End llvm namespace diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h index 5df6bb02308d..9604b2521e89 100644 --- a/include/llvm/Analysis/Loads.h +++ b/include/llvm/Analysis/Loads.h @@ -20,7 +20,9 @@ namespace llvm { class DataLayout; +class Loop; class MDNode; +class ScalarEvolution; /// Return true if this is always a dereferenceable pointer. If the context /// instruction is specified perform context-sensitive analysis and return true @@ -35,7 +37,8 @@ bool isDereferenceablePointer(const Value *V, Type *Ty, /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, - unsigned Align, const DataLayout &DL, + MaybeAlign Alignment, + const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -43,7 +46,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, /// greater or equal than requested. If the context instruction is specified /// performs context-sensitive analysis and returns true if the pointer is /// dereferenceable at the specified instruction. -bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); @@ -56,11 +59,22 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, +bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); +/// Return true if we can prove that the given load (which is assumed to be +/// within the specified loop) would access only dereferenceable memory, and +/// be properly aligned on every iteration of the specified loop regardless of +/// its placement within the loop. (i.e. does not require predication beyond +/// that required by the the header itself and could be hoisted into the header +/// if desired.) This is more powerful than the variants above when the +/// address loaded from is analyzeable by SCEV. +bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, + ScalarEvolution &SE, + DominatorTree &DT); + /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive @@ -69,7 +83,7 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size, /// If it is not obviously safe to load from the specified pointer, we do a /// quick local scan of the basic block containing ScanFrom, to determine if /// the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align, +bool isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, Instruction *ScanFrom = nullptr, const DominatorTree *DT = nullptr); diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h index 368a810cfa67..a2e65a7310af 100644 --- a/include/llvm/Analysis/LoopAnalysisManager.h +++ b/include/llvm/Analysis/LoopAnalysisManager.h @@ -86,8 +86,9 @@ typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function> template <> class LoopAnalysisManagerFunctionProxy::Result { public: explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI) - : InnerAM(&InnerAM), LI(&LI) {} - Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI) { + : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {} + Result(Result &&Arg) + : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) { // We have to null out the analysis manager in the moved-from state // because we are taking ownership of the responsibilty to clear the // analysis state. @@ -96,6 +97,7 @@ public: Result &operator=(Result &&RHS) { InnerAM = RHS.InnerAM; LI = RHS.LI; + MSSAUsed = RHS.MSSAUsed; // We have to null out the analysis manager in the moved-from state // because we are taking ownership of the responsibilty to clear the // analysis state. @@ -112,6 +114,9 @@ public: InnerAM->clear(); } + /// Mark MemorySSA as used so we can invalidate self if MSSA is invalidated. + void markMSSAUsed() { MSSAUsed = true; } + /// Accessor for the analysis manager. LoopAnalysisManager &getManager() { return *InnerAM; } @@ -130,6 +135,7 @@ public: private: LoopAnalysisManager *InnerAM; LoopInfo *LI; + bool MSSAUsed; }; /// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy diff --git a/include/llvm/Analysis/LoopCacheAnalysis.h b/include/llvm/Analysis/LoopCacheAnalysis.h new file mode 100644 index 000000000000..ffec78b6db2c --- /dev/null +++ b/include/llvm/Analysis/LoopCacheAnalysis.h @@ -0,0 +1,281 @@ +//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the interface for the loop cache analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H +#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class LPMUpdater; +using CacheCostTy = int64_t; +using LoopVectorTy = SmallVector<Loop *, 8>; + +/// Represents a memory reference as a base pointer and a set of indexing +/// operations. For example given the array reference A[i][2j+1][3k+2] in a +/// 3-dim loop nest: +/// for(i=0;i<n;++i) +/// for(j=0;j<m;++j) +/// for(k=0;k<o;++k) +/// ... A[i][2j+1][3k+2] ... +/// We expect: +/// BasePointer -> A +/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>] +/// Sizes -> [m][o][4] +class IndexedReference { + friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); + +public: + /// Construct an indexed reference given a \p StoreOrLoadInst instruction. + IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI, + ScalarEvolution &SE); + + bool isValid() const { return IsValid; } + const SCEV *getBasePointer() const { return BasePointer; } + size_t getNumSubscripts() const { return Subscripts.size(); } + const SCEV *getSubscript(unsigned SubNum) const { + assert(SubNum < getNumSubscripts() && "Invalid subscript number"); + return Subscripts[SubNum]; + } + const SCEV *getFirstSubscript() const { + assert(!Subscripts.empty() && "Expecting non-empty container"); + return Subscripts.front(); + } + const SCEV *getLastSubscript() const { + assert(!Subscripts.empty() && "Expecting non-empty container"); + return Subscripts.back(); + } + + /// Return true/false if the current object and the indexed reference \p Other + /// are/aren't in the same cache line of size \p CLS. Two references are in + /// the same chace line iff the distance between them in the innermost + /// dimension is less than the cache line size. Return None if unsure. + Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS, + AliasAnalysis &AA) const; + + /// Return true if the current object and the indexed reference \p Other + /// have distance smaller than \p MaxDistance in the dimension associated with + /// the given loop \p L. Return false if the distance is not smaller than \p + /// MaxDistance and None if unsure. + Optional<bool> hasTemporalReuse(const IndexedReference &Other, + unsigned MaxDistance, const Loop &L, + DependenceInfo &DI, AliasAnalysis &AA) const; + + /// Compute the cost of the reference w.r.t. the given loop \p L when it is + /// considered in the innermost position in the loop nest. + /// The cost is defined as: + /// - equal to one if the reference is loop invariant, or + /// - equal to '(TripCount * stride) / cache_line_size' if: + /// + the reference stride is less than the cache line size, and + /// + the coefficient of this loop's index variable used in all other + /// subscripts is zero + /// - or otherwise equal to 'TripCount'. + CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const; + +private: + /// Attempt to delinearize the indexed reference. + bool delinearize(const LoopInfo &LI); + + /// Return true if the index reference is invariant with respect to loop \p L. + bool isLoopInvariant(const Loop &L) const; + + /// Return true if the indexed reference is 'consecutive' in loop \p L. + /// An indexed reference is 'consecutive' if the only coefficient that uses + /// the loop induction variable is the rightmost one, and the access stride is + /// smaller than the cache line size \p CLS. + bool isConsecutive(const Loop &L, unsigned CLS) const; + + /// Return the coefficient used in the rightmost dimension. + const SCEV *getLastCoefficient() const; + + /// Return true if the coefficient corresponding to induction variable of + /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L. + bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript, + const Loop &L) const; + + /// Verify that the given \p Subscript is 'well formed' (must be a simple add + /// recurrence). + bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const; + + /// Return true if the given reference \p Other is definetely aliased with + /// the indexed reference represented by this class. + bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const; + +private: + /// True if the reference can be delinearized, false otherwise. + bool IsValid = false; + + /// Represent the memory reference instruction. + Instruction &StoreOrLoadInst; + + /// The base pointer of the memory reference. + const SCEV *BasePointer = nullptr; + + /// The subscript (indexes) of the memory reference. + SmallVector<const SCEV *, 3> Subscripts; + + /// The dimensions of the memory reference. + SmallVector<const SCEV *, 3> Sizes; + + ScalarEvolution &SE; +}; + +/// A reference group represents a set of memory references that exhibit +/// temporal or spacial reuse. Two references belong to the same +/// reference group with respect to a inner loop L iff: +/// 1. they have a loop independent dependency, or +/// 2. they have a loop carried dependence with a small dependence distance +/// (e.g. less than 2) carried by the inner loop, or +/// 3. they refer to the same array, and the subscript in their innermost +/// dimension is less than or equal to 'd' (where 'd' is less than the cache +/// line size) +/// +/// Intuitively a reference group represents memory references that access +/// the same cache line. Conditions 1,2 above account for temporal reuse, while +/// contition 3 accounts for spacial reuse. +using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>; +using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>; + +/// \c CacheCost represents the estimated cost of a inner loop as the number of +/// cache lines used by the memory references it contains. +/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of +/// the cache costs of all of its reference groups when the loop is considered +/// to be in the innermost position in the nest. +/// A reference group represents memory references that fall into the same cache +/// line. Each reference group is analysed with respect to the innermost loop in +/// a loop nest. The cost of a reference is defined as follow: +/// - one if it is loop invariant w.r.t the innermost loop, +/// - equal to the loop trip count divided by the cache line times the +/// reference stride if the reference stride is less than the cache line +/// size (CLS), and the coefficient of this loop's index variable used in all +/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride)) +/// - equal to the innermost loop trip count if the reference stride is greater +/// or equal to the cache line size CLS. +class CacheCost { + friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); + using LoopTripCountTy = std::pair<const Loop *, unsigned>; + using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>; + +public: + static CacheCostTy constexpr InvalidCost = -1; + + /// Construct a CacheCost object for the loop nest described by \p Loops. + /// The optional parameter \p TRT can be used to specify the max. distance + /// between array elements accessed in a loop so that the elements are + /// classified to have temporal reuse. + CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, + TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI, + Optional<unsigned> TRT = None); + + /// Create a CacheCost for the loop nest rooted by \p Root. + /// The optional parameter \p TRT can be used to specify the max. distance + /// between array elements accessed in a loop so that the elements are + /// classified to have temporal reuse. + static std::unique_ptr<CacheCost> + getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI, + Optional<unsigned> TRT = None); + + /// Return the estimated cost of loop \p L if the given loop is part of the + /// loop nest associated with this object. Return -1 otherwise. + CacheCostTy getLoopCost(const Loop &L) const { + auto IT = std::find_if( + LoopCosts.begin(), LoopCosts.end(), + [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; }); + return (IT != LoopCosts.end()) ? (*IT).second : -1; + } + + /// Return the estimated ordered loop costs. + const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; } + +private: + /// Calculate the cache footprint of each loop in the nest (when it is + /// considered to be in the innermost position). + void calculateCacheFootprint(); + + /// Partition store/load instructions in the loop nest into reference groups. + /// Two or more memory accesses belong in the same reference group if they + /// share the same cache line. + bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const; + + /// Calculate the cost of the given loop \p L assuming it is the innermost + /// loop in nest. + CacheCostTy computeLoopCacheCost(const Loop &L, + const ReferenceGroupsTy &RefGroups) const; + + /// Compute the cost of a representative reference in reference group \p RG + /// when the given loop \p L is considered as the innermost loop in the nest. + /// The computed cost is an estimate for the number of cache lines used by the + /// reference group. The representative reference cost is defined as: + /// - equal to one if the reference is loop invariant, or + /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's + /// induction variable is used only in the reference subscript associated + /// with loop \p L, and (b) the reference stride is less than the cache + /// line size, or + /// - TripCount otherwise + CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG, + const Loop &L) const; + + /// Sort the LoopCosts vector by decreasing cache cost. + void sortLoopCosts() { + sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) { + return A.second > B.second; + }); + } + +private: + /// Loops in the loop nest associated with this object. + LoopVectorTy Loops; + + /// Trip counts for the loops in the loop nest associated with this object. + SmallVector<LoopTripCountTy, 3> TripCounts; + + /// Cache costs for the loops in the loop nest associated with this object. + SmallVector<LoopCacheCostTy, 3> LoopCosts; + + /// The max. distance between array elements accessed in a loop so that the + /// elements are classified to have temporal reuse. + Optional<unsigned> TRT; + + const LoopInfo &LI; + ScalarEvolution &SE; + TargetTransformInfo &TTI; + AliasAnalysis &AA; + DependenceInfo &DI; +}; + +raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R); +raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC); + +/// Printer pass for the \c CacheCost results. +class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> { + raw_ostream &OS; + +public: + explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 584eb3a8c854..abf3863b0601 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -30,6 +30,9 @@ // instance. In particular, a Loop might be inside such a non-loop SCC, or a // non-loop SCC might contain a sub-SCC which is a Loop. // +// For an overview of terminology used in this API (and thus all of our loop +// analyses or transforms), see docs/LoopTerminology.rst. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_LOOPINFO_H @@ -570,9 +573,9 @@ public: bool getIncomingAndBackEdge(BasicBlock *&Incoming, BasicBlock *&Backedge) const; - /// Below are some utilities to get loop bounds and induction variable, and - /// check if a given phinode is an auxiliary induction variable, as well as - /// checking if the loop is canonical. + /// Below are some utilities to get the loop guard, loop bounds and induction + /// variable, and to check if a given phinode is an auxiliary induction + /// variable, if the loop is guarded, and if the loop is canonical. /// /// Here is an example: /// \code @@ -604,6 +607,9 @@ public: /// /// - getInductionVariable --> i_1 /// - isAuxiliaryInductionVariable(x) --> true if x == i_1 + /// - getLoopGuardBranch() + /// --> `if (guardcmp) goto preheader; else goto afterloop` + /// - isGuarded() --> true /// - isCanonical --> false struct LoopBounds { /// Return the LoopBounds object if @@ -725,6 +731,31 @@ public: bool isAuxiliaryInductionVariable(PHINode &AuxIndVar, ScalarEvolution &SE) const; + /// Return the loop guard branch, if it exists. + /// + /// This currently only works on simplified loop, as it requires a preheader + /// and a latch to identify the guard. It will work on loops of the form: + /// \code + /// GuardBB: + /// br cond1, Preheader, ExitSucc <== GuardBranch + /// Preheader: + /// br Header + /// Header: + /// ... + /// br Latch + /// Latch: + /// br cond2, Header, ExitBlock + /// ExitBlock: + /// br ExitSucc + /// ExitSucc: + /// \endcode + BranchInst *getLoopGuardBranch() const; + + /// Return true iff the loop is + /// - in simplify rotated form, and + /// - guarded by a loop guard branch. + bool isGuarded() const { return (getLoopGuardBranch() != nullptr); } + /// Return true if the loop induction variable starts at zero and increments /// by one each time through the loop. bool isCanonical(ScalarEvolution &SE) const; diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 4c33dac9e21e..8b11e848a195 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -85,9 +85,9 @@ template <class BlockT, class LoopT> bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const { // Each predecessor of each exit block of a normal loop is contained // within the loop. - SmallVector<BlockT *, 4> ExitBlocks; - getExitBlocks(ExitBlocks); - for (BlockT *EB : ExitBlocks) + SmallVector<BlockT *, 4> UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + for (BlockT *EB : UniqueExitBlocks) for (BlockT *Predecessor : children<Inverse<BlockT *>>(EB)) if (!contains(Predecessor)) return false; @@ -200,8 +200,6 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { } } - // Make sure there is only one exit out of the preheader. - assert(Out && "Header of loop has no predecessors from outside loop?"); return Out; } diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 49f9e58ffad7..a89d76b9e5bd 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -58,6 +58,9 @@ class Value; /// like). bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); +bool isAllocationFn(const Value *V, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + bool LookThroughBitCast = false); /// Tests if a value is a call or invoke to a function that returns a /// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions). @@ -68,6 +71,9 @@ bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI, /// allocates uninitialized memory (such as malloc). bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); +bool isMallocLikeFn(const Value *V, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI, + bool LookThroughBitCast = false); /// Tests if a value is a call or invoke to a library function that /// allocates zero-filled memory (such as calloc). @@ -93,6 +99,16 @@ bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// reallocates memory (e.g., realloc). bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); +/// Tests if a value is a call or invoke to a library function that +/// allocates memory and throws if an allocation failed (e.g., new). +bool isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + +/// Tests if a value is a call or invoke to a library function that +/// allocates memory (strdup, strndup). +bool isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + //===----------------------------------------------------------------------===// // malloc Call Utility Functions. // @@ -100,9 +116,13 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI); /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. -const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI); -inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) { - return const_cast<CallInst*>(extractMallocCall((const Value*)I, TLI)); +const CallInst * +extractMallocCall(const Value *I, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI); +inline CallInst * +extractMallocCall(Value *I, + function_ref<const TargetLibraryInfo &(Function &)> GetTLI) { + return const_cast<CallInst *>(extractMallocCall((const Value *)I, GetTLI)); } /// getMallocType - Returns the PointerType resulting from the malloc call. diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index e2669c2fa601..e89e5690fad0 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -362,11 +362,14 @@ private: PhiValues &PV; PredIteratorCache PredCache; + unsigned DefaultBlockScanLimit; + public: MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC, - const TargetLibraryInfo &TLI, - DominatorTree &DT, PhiValues &PV) - : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {} + const TargetLibraryInfo &TLI, DominatorTree &DT, + PhiValues &PV, unsigned DefaultBlockScanLimit) + : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV), + DefaultBlockScanLimit(DefaultBlockScanLimit) {} /// Handle invalidation in the new PM. bool invalidate(Function &F, const PreservedAnalyses &PA, @@ -511,9 +514,14 @@ class MemoryDependenceAnalysis static AnalysisKey Key; + unsigned DefaultBlockScanLimit; + public: using Result = MemoryDependenceResults; + MemoryDependenceAnalysis(); + MemoryDependenceAnalysis(unsigned DefaultBlockScanLimit) : DefaultBlockScanLimit(DefaultBlockScanLimit) { } + MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM); }; diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h index b7730be75354..e89bf26a7234 100644 --- a/include/llvm/Analysis/MemorySSA.h +++ b/include/llvm/Analysis/MemorySSA.h @@ -793,6 +793,7 @@ protected: friend class MemorySSAPrinterLegacyPass; friend class MemorySSAUpdater; + void verifyPrevDefInPhis(Function &F) const; void verifyDefUses(Function &F) const; void verifyDomination(Function &F) const; void verifyOrdering(Function &F) const; @@ -830,7 +831,8 @@ protected: void insertIntoListsBefore(MemoryAccess *, const BasicBlock *, AccessList::iterator); MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *, - const MemoryUseOrDef *Template = nullptr); + const MemoryUseOrDef *Template = nullptr, + bool CreationMustSucceed = true); private: template <class AliasAnalysisType> class ClobberWalkerBase; diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h index d4d8040c1ff6..1d34663721e3 100644 --- a/include/llvm/Analysis/MemorySSAUpdater.h +++ b/include/llvm/Analysis/MemorySSAUpdater.h @@ -99,7 +99,7 @@ public: /// load a /// Where a mayalias b, *does* require RenameUses be set to true. void insertDef(MemoryDef *Def, bool RenameUses = false); - void insertUse(MemoryUse *Use); + void insertUse(MemoryUse *Use, bool RenameUses = false); /// Update the MemoryPhi in `To` following an edge deletion between `From` and /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made. void removeEdge(BasicBlock *From, BasicBlock *To); @@ -275,6 +275,7 @@ private: getPreviousDefRecursive(BasicBlock *, DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &); MemoryAccess *recursePhi(MemoryAccess *Phi); + MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi); template <class RangeType> MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs); diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h index 3ef539c89d97..87cf9f85c7f1 100644 --- a/include/llvm/Analysis/MustExecute.h +++ b/include/llvm/Analysis/MustExecute.h @@ -7,10 +7,17 @@ //===----------------------------------------------------------------------===// /// \file /// Contains a collection of routines for determining if a given instruction is -/// guaranteed to execute if a given point in control flow is reached. The most +/// guaranteed to execute if a given point in control flow is reached. The most /// common example is an instruction within a loop being provably executed if we /// branch to the header of it's containing loop. /// +/// There are two interfaces available to determine if an instruction is +/// executed once a given point in the control flow is reached: +/// 1) A loop-centric one derived from LoopSafetyInfo. +/// 2) A "must be executed context"-based one implemented in the +/// MustBeExecutedContextExplorer. +/// Please refer to the class comments for more information. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H @@ -164,6 +171,280 @@ public: virtual ~ICFLoopSafetyInfo() {}; }; -} +struct MustBeExecutedContextExplorer; + +/// Must be executed iterators visit stretches of instructions that are +/// guaranteed to be executed together, potentially with other instruction +/// executed in-between. +/// +/// Given the following code, and assuming all statements are single +/// instructions which transfer execution to the successor (see +/// isGuaranteedToTransferExecutionToSuccessor), there are two possible +/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B, +/// and E. If we start at C or D, we will visit all instructions A-E. +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// \endcode +/// +/// +/// Below is the example extneded with instructions F and G. Now we assume F +/// might not transfer execution to it's successor G. As a result we get the +/// following visit sets: +/// +/// Start Instruction | Visit Set +/// A | A, B, E, F +/// B | A, B, E, F +/// C | A, B, C, D, E, F +/// D | A, B, C, D, E, F +/// E | A, B, E, F +/// F | A, B, E, F +/// G | A, B, E, F, G +/// +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// F; // Might not transfer execution to its successor G. +/// G; +/// \endcode +/// +/// +/// A more complex example involving conditionals, loops, break, and continue +/// is shown below. We again assume all instructions will transmit control to +/// the successor and we assume we can prove the inner loop to be finite. We +/// omit non-trivial branch conditions as the exploration is oblivious to them. +/// Constant branches are assumed to be unconditional in the CFG. The resulting +/// visist sets are shown in the table below. +/// +/// \code +/// A; +/// while (true) { +/// B; +/// if (...) +/// C; +/// if (...) +/// continue; +/// D; +/// if (...) +/// break; +/// do { +/// if (...) +/// continue; +/// E; +/// } while (...); +/// F; +/// } +/// G; +/// \endcode +/// +/// Start Instruction | Visit Set +/// A | A, B +/// B | A, B +/// C | A, B, C +/// D | A, B, D +/// E | A, B, D, E, F +/// F | A, B, D, F +/// G | A, B, D, G +/// +/// +/// Note that the examples show optimal visist sets but not necessarily the ones +/// derived by the explorer depending on the available CFG analyses (see +/// MustBeExecutedContextExplorer). Also note that we, depending on the options, +/// the visit set can contain instructions from other functions. +struct MustBeExecutedIterator { + /// Type declarations that make his class an input iterator. + ///{ + typedef const Instruction *value_type; + typedef std::ptrdiff_t difference_type; + typedef const Instruction **pointer; + typedef const Instruction *&reference; + typedef std::input_iterator_tag iterator_category; + ///} + + using ExplorerTy = MustBeExecutedContextExplorer; + + MustBeExecutedIterator(const MustBeExecutedIterator &Other) + : Visited(Other.Visited), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator(MustBeExecutedIterator &&Other) + : Visited(std::move(Other.Visited)), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) { + if (this != &Other) { + std::swap(Visited, Other.Visited); + std::swap(CurInst, Other.CurInst); + } + return *this; + } + + ~MustBeExecutedIterator() {} + + /// Pre- and post-increment operators. + ///{ + MustBeExecutedIterator &operator++() { + CurInst = advance(); + return *this; + } + + MustBeExecutedIterator operator++(int) { + MustBeExecutedIterator tmp(*this); + operator++(); + return tmp; + } + ///} + + /// Equality and inequality operators. Note that we ignore the history here. + ///{ + bool operator==(const MustBeExecutedIterator &Other) const { + return CurInst == Other.CurInst; + } + + bool operator!=(const MustBeExecutedIterator &Other) const { + return !(*this == Other); + } + ///} + + /// Return the underlying instruction. + const Instruction *&operator*() { return CurInst; } + const Instruction *getCurrentInst() const { return CurInst; } + + /// Return true if \p I was encountered by this iterator already. + bool count(const Instruction *I) const { return Visited.count(I); } + +private: + using VisitedSetTy = DenseSet<const Instruction *>; + + /// Private constructors. + MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I); + + /// Reset the iterator to its initial state pointing at \p I. + void reset(const Instruction *I); + + /// Try to advance one of the underlying positions (Head or Tail). + /// + /// \return The next instruction in the must be executed context, or nullptr + /// if none was found. + const Instruction *advance(); + + /// A set to track the visited instructions in order to deal with endless + /// loops and recursion. + VisitedSetTy Visited; + + /// A reference to the explorer that created this iterator. + ExplorerTy &Explorer; + + /// The instruction we are currently exposing to the user. There is always an + /// instruction that we know is executed with the given program point, + /// initially the program point itself. + const Instruction *CurInst; + + friend struct MustBeExecutedContextExplorer; +}; + +/// A "must be executed context" for a given program point PP is the set of +/// instructions, potentially before and after PP, that are executed always when +/// PP is reached. The MustBeExecutedContextExplorer an interface to explore +/// "must be executed contexts" in a module through the use of +/// MustBeExecutedIterator. +/// +/// The explorer exposes "must be executed iterators" that traverse the must be +/// executed context. There is little information sharing between iterators as +/// the expected use case involves few iterators for "far apart" instructions. +/// If that changes, we should consider caching more intermediate results. +struct MustBeExecutedContextExplorer { + + /// In the description of the parameters we use PP to denote a program point + /// for which the must be executed context is explored, or put differently, + /// for which the MustBeExecutedIterator is created. + /// + /// \param ExploreInterBlock Flag to indicate if instructions in blocks + /// other than the parent of PP should be + /// explored. + MustBeExecutedContextExplorer(bool ExploreInterBlock) + : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {} + + /// Clean up the dynamically allocated iterators. + ~MustBeExecutedContextExplorer() { + DeleteContainerSeconds(InstructionIteratorMap); + } + + /// Iterator-based interface. \see MustBeExecutedIterator. + ///{ + using iterator = MustBeExecutedIterator; + using const_iterator = const MustBeExecutedIterator; + + /// Return an iterator to explore the context around \p PP. + iterator &begin(const Instruction *PP) { + auto *&It = InstructionIteratorMap[PP]; + if (!It) + It = new iterator(*this, PP); + return *It; + } + + /// Return an iterator to explore the cached context around \p PP. + const_iterator &begin(const Instruction *PP) const { + return *InstructionIteratorMap.lookup(PP); + } + + /// Return an universal end iterator. + ///{ + iterator &end() { return EndIterator; } + iterator &end(const Instruction *) { return EndIterator; } + + const_iterator &end() const { return EndIterator; } + const_iterator &end(const Instruction *) const { return EndIterator; } + ///} + + /// Return an iterator range to explore the context around \p PP. + llvm::iterator_range<iterator> range(const Instruction *PP) { + return llvm::make_range(begin(PP), end(PP)); + } + + /// Return an iterator range to explore the cached context around \p PP. + llvm::iterator_range<const_iterator> range(const Instruction *PP) const { + return llvm::make_range(begin(PP), end(PP)); + } + ///} + + /// Return the next instruction that is guaranteed to be executed after \p PP. + /// + /// \param It The iterator that is used to traverse the must be + /// executed context. + /// \param PP The program point for which the next instruction + /// that is guaranteed to execute is determined. + const Instruction * + getMustBeExecutedNextInstruction(MustBeExecutedIterator &It, + const Instruction *PP); + + /// Parameter that limit the performed exploration. See the constructor for + /// their meaning. + ///{ + const bool ExploreInterBlock; + ///} + +private: + /// Map from instructions to associated must be executed iterators. + DenseMap<const Instruction *, MustBeExecutedIterator *> + InstructionIteratorMap; + + /// A unique end iterator. + MustBeExecutedIterator EndIterator; +}; + +} // namespace llvm #endif diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index d9c97dff8c6e..8562519fa7b1 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -103,6 +103,13 @@ namespace llvm { // FunctionPass *createMustExecutePrinter(); + //===--------------------------------------------------------------------===// + // + // createMustBeExecutedContextPrinter - This pass prints information about which + // instructions are guaranteed to execute together (run with -analyze). + // + ModulePass *createMustBeExecutedContextPrinter(); + } #endif diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index f309d344b8d1..6693e40ccf22 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -52,6 +52,15 @@ private: // because the number of profile counts required to reach the hot // percentile is above a huge threshold. Optional<bool> HasHugeWorkingSetSize; + // True if the working set size of the code is considered large, + // because the number of profile counts required to reach the hot + // percentile is above a large threshold. + Optional<bool> HasLargeWorkingSetSize; + // Compute the threshold for a given cutoff. + Optional<uint64_t> computeThreshold(int PercentileCutoff); + // The map that caches the threshold values. The keys are the percentile + // cutoff values and the values are the corresponding threshold values. + DenseMap<int, uint64_t> ThresholdCache; public: ProfileSummaryInfo(Module &M) : M(M) {} @@ -96,6 +105,8 @@ public: bool AllowSynthetic = false); /// Returns true if the working set size of the code is considered huge. bool hasHugeWorkingSetSize(); + /// Returns true if the working set size of the code is considered large. + bool hasLargeWorkingSetSize(); /// Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); /// Returns true if \p F contains hot code. @@ -104,14 +115,26 @@ public: bool isFunctionEntryCold(const Function *F); /// Returns true if \p F contains only cold code. bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI); + /// Returns true if \p F contains hot code with regard to a given hot + /// percentile cutoff value. + bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff, + const Function *F, + BlockFrequencyInfo &BFI); /// Returns true if count \p C is considered hot. bool isHotCount(uint64_t C); /// Returns true if count \p C is considered cold. bool isColdCount(uint64_t C); + /// Returns true if count \p C is considered hot with regard to a given + /// hot percentile cutoff value. + bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C); /// Returns true if BasicBlock \p BB is considered hot. bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if BasicBlock \p BB is considered cold. bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI); + /// Returns true if BasicBlock \p BB is considered hot with regard to a given + /// hot percentile cutoff value. + bool isHotBlockNthPercentile(int PercentileCutoff, + const BasicBlock *BB, BlockFrequencyInfo *BFI); /// Returns true if CallSite \p CS is considered hot. bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); /// Returns true if Callsite \p CS is considered cold. diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index c59c09dd2095..6b5936680c37 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -365,7 +365,7 @@ typename Tr::RegionNodeT *RegionBase<Tr>::getBBNode(BlockT *BB) const { auto Deconst = const_cast<RegionBase<Tr> *>(this); typename BBNodeMapT::value_type V = { BB, - llvm::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)}; + std::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)}; at = BBNodeMap.insert(std::move(V)).first; } return at->second.get(); diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 0bd98ef37e7a..9c55f7a5090f 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -468,6 +468,8 @@ template <> struct DenseMapInfo<ExitLimitQuery> { /// can't do much with the SCEV objects directly, they must ask this class /// for services. class ScalarEvolution { + friend class ScalarEvolutionsTest; + public: /// An enum describing the relationship between a SCEV and a loop. enum LoopDisposition { @@ -777,10 +779,10 @@ public: /// to (i.e. a "conservative over-approximation") of the value returend by /// getBackedgeTakenCount. If such a value cannot be computed, it returns the /// SCEVCouldNotCompute object. - const SCEV *getMaxBackedgeTakenCount(const Loop *L); + const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L); /// Return true if the backedge taken count is either the value returned by - /// getMaxBackedgeTakenCount or zero. + /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); /// Return true if the specified loop has an analyzable loop-invariant diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index a519f93216b3..b4d727449fbe 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -77,9 +77,13 @@ namespace llvm { /// Phis that complete an IV chain. Reuse DenseSet<AssertingVH<PHINode>> ChainedPhis; - /// When true, expressions are expanded in "canonical" form. In particular, - /// addrecs are expanded as arithmetic based on a canonical induction - /// variable. When false, expression are expanded in a more literal form. + /// When true, SCEVExpander tries to expand expressions in "canonical" form. + /// When false, expressions are expanded in a more literal form. + /// + /// In "canonical" form addrecs are expanded as arithmetic based on a + /// canonical induction variable. Note that CanonicalMode doesn't guarantee + /// that all expressions are expanded in "canonical" form. For some + /// expressions literal mode can be preferred. bool CanonicalMode; /// When invoked from LSR, the expander is in "strength reduction" mode. The @@ -275,8 +279,16 @@ namespace llvm { /// Clear the current insertion point. This is useful if the instruction /// that had been serving as the insertion point may have been deleted. - void clearInsertPoint() { - Builder.ClearInsertionPoint(); + void clearInsertPoint() { Builder.ClearInsertionPoint(); } + + /// Set location information used by debugging information. + void SetCurrentDebugLocation(DebugLoc L) { + Builder.SetCurrentDebugLocation(std::move(L)); + } + + /// Get location information used by debugging information. + const DebugLoc &getCurrentDebugLocation() const { + return Builder.getCurrentDebugLocation(); } /// Return true if the specified instruction was inserted by the code diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 4b5200f5a838..d4b223863c54 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -30,11 +30,12 @@ struct VecDesc { unsigned VectorizationFactor; }; - enum LibFunc { + enum LibFunc : unsigned { #define TLI_DEFINE_ENUM #include "llvm/Analysis/TargetLibraryInfo.def" - NumLibFuncs + NumLibFuncs, + NotLibFunc }; /// Implementation of the target library information. @@ -48,7 +49,7 @@ class TargetLibraryInfoImpl { unsigned char AvailableArray[(NumLibFuncs+3)/4]; llvm::DenseMap<unsigned, std::string> CustomNames; - static StringRef const StandardNames[NumLibFuncs]; + static StringLiteral const StandardNames[NumLibFuncs]; bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param; enum AvailabilityState { @@ -359,7 +360,6 @@ public: TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl) : PresetInfoImpl(std::move(PresetInfoImpl)) {} - TargetLibraryInfo run(Module &M, ModuleAnalysisManager &); TargetLibraryInfo run(Function &F, FunctionAnalysisManager &); private: @@ -385,8 +385,13 @@ public: explicit TargetLibraryInfoWrapperPass(const Triple &T); explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI); - TargetLibraryInfo &getTLI() { return TLI; } - const TargetLibraryInfo &getTLI() const { return TLI; } + TargetLibraryInfo &getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) { + return TLI; + } + const TargetLibraryInfo & + getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) const { + return TLI; + } }; } // end namespace llvm diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 7574b811bc1c..d6fa88411654 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -368,6 +368,20 @@ public: /// optimize away. unsigned getFlatAddressSpace() const; + /// Return any intrinsic address operand indexes which may be rewritten if + /// they use a flat address space pointer. + /// + /// \returns true if the intrinsic was handled. + bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, + Intrinsic::ID IID) const; + + /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p + /// NewV, which has a different address space. This should happen for every + /// operand index that collectFlatAddressOperands returned for the intrinsic. + /// \returns true if the intrinsic /// was handled. + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const; + /// Test whether calls to a function lower to actual program function /// calls. /// @@ -469,12 +483,17 @@ public: bool Force; /// Allow using trip count upper bound to unroll loops. bool UpperBound; - /// Allow peeling off loop iterations for loops with low dynamic tripcount. + /// Allow peeling off loop iterations. bool AllowPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. bool UnrollAndJam; + /// Allow peeling basing on profile. Uses to enable peeling off all + /// iterations basing on provided profile. + /// If the value is true the peeling cost model can decide to peel only + /// some iterations and in this case it will set this to false. + bool PeelProfiledIterations; /// Threshold for unroll and jam, for inner loop size. The 'Threshold' /// value above is used during unroll and jam for the outer loop size. /// This value is used in the same manner to limit the size of the inner @@ -555,15 +574,15 @@ public: /// modes that operate across loop iterations. bool shouldFavorBackedgeIndex(const Loop *L) const; - /// Return true if the target supports masked load. - bool isLegalMaskedStore(Type *DataType) const; /// Return true if the target supports masked store. - bool isLegalMaskedLoad(Type *DataType) const; + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const; + /// Return true if the target supports masked load. + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const; /// Return true if the target supports nontemporal store. - bool isLegalNTStore(Type *DataType, unsigned Alignment) const; + bool isLegalNTStore(Type *DataType, Align Alignment) const; /// Return true if the target supports nontemporal load. - bool isLegalNTLoad(Type *DataType, unsigned Alignment) const; + bool isLegalNTLoad(Type *DataType, Align Alignment) const; /// Return true if the target supports masked scatter. bool isLegalMaskedScatter(Type *DataType) const; @@ -622,12 +641,6 @@ public: /// Return true if this type is legal. bool isTypeLegal(Type *Ty) const; - /// Returns the target's jmp_buf alignment in bytes. - unsigned getJumpBufAlignment() const; - - /// Returns the target's jmp_buf size in bytes. - unsigned getJumpBufSize() const; - /// Return true if switches should be turned into lookup tables for the /// target. bool shouldBuildLookupTables() const; @@ -775,10 +788,23 @@ public: /// Additional properties of an operand's values. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; - /// \return The number of scalar or vector registers that the target has. - /// If 'Vectors' is true, it returns the number of vector registers. If it is - /// set to false, it returns the number of scalar registers. - unsigned getNumberOfRegisters(bool Vector) const; + /// \return the number of registers in the target-provided register class. + unsigned getNumberOfRegisters(unsigned ClassID) const; + + /// \return the target-provided register class ID for the provided type, + /// accounting for type promotion and other type-legalization techniques that the target might apply. + /// However, it specifically does not account for the scalarization or splitting of vector types. + /// Should a vector type require scalarization or splitting into multiple underlying vector registers, + /// that type should be mapped to a register class containing no registers. + /// Specifically, this is designed to provide a simple, high-level view of the register allocation + /// later performed by the backend. These register classes don't necessarily map onto the + /// register classes used by the backend. + /// FIXME: It's not currently possible to determine how many registers + /// are used by the provided type. + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; + + /// \return the target-provided register class name + const char* getRegisterClassName(unsigned ClassID) const; /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; @@ -824,18 +850,20 @@ public: /// \return The associativity of the cache level, if available. llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const; - /// \return How much before a load we should place the prefetch instruction. - /// This is currently measured in number of instructions. + /// \return How much before a load we should place the prefetch + /// instruction. This is currently measured in number of + /// instructions. unsigned getPrefetchDistance() const; - /// \return Some HW prefetchers can handle accesses up to a certain constant - /// stride. This is the minimum stride in bytes where it makes sense to start - /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. + /// \return Some HW prefetchers can handle accesses up to a certain + /// constant stride. This is the minimum stride in bytes where it + /// makes sense to start adding SW prefetches. The default is 1, + /// i.e. prefetch with any stride. unsigned getMinPrefetchStride() const; - /// \return The maximum number of iterations to prefetch ahead. If the - /// required number of iterations is more than this number, no prefetching is - /// performed. + /// \return The maximum number of iterations to prefetch ahead. If + /// the required number of iterations is more than this number, no + /// prefetching is performed. unsigned getMaxPrefetchIterationsAhead() const; /// \return The maximum interleave factor that any transform should try to @@ -1155,6 +1183,10 @@ public: virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isAlwaysUniform(const Value *V) = 0; virtual unsigned getFlatAddressSpace() = 0; + virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, + Intrinsic::ID IID) const = 0; + virtual bool rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) = 0; @@ -1177,10 +1209,10 @@ public: TargetLibraryInfo *LibInfo) = 0; virtual bool shouldFavorPostInc() const = 0; virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; - virtual bool isLegalMaskedStore(Type *DataType) = 0; - virtual bool isLegalMaskedLoad(Type *DataType) = 0; - virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0; - virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0; + virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0; + virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0; + virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; + virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; @@ -1196,8 +1228,6 @@ public: virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; - virtual unsigned getJumpBufAlignment() = 0; - virtual unsigned getJumpBufSize() = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool useColdCCForColdCall(Function &F) = 0; @@ -1228,19 +1258,35 @@ public: Type *Ty) = 0; virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual unsigned getNumberOfRegisters(bool Vector) = 0; + virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; + virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; + virtual const char* getRegisterClassName(unsigned ClassID) const = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; - virtual unsigned getCacheLineSize() = 0; - virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0; - virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0; - virtual unsigned getPrefetchDistance() = 0; - virtual unsigned getMinPrefetchStride() = 0; - virtual unsigned getMaxPrefetchIterationsAhead() = 0; + virtual unsigned getCacheLineSize() const = 0; + virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0; + virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0; + + /// \return How much before a load we should place the prefetch + /// instruction. This is currently measured in number of + /// instructions. + virtual unsigned getPrefetchDistance() const = 0; + + /// \return Some HW prefetchers can handle accesses up to a certain + /// constant stride. This is the minimum stride in bytes where it + /// makes sense to start adding SW prefetches. The default is 1, + /// i.e. prefetch with any stride. + virtual unsigned getMinPrefetchStride() const = 0; + + /// \return The maximum number of iterations to prefetch ahead. If + /// the required number of iterations is more than this number, no + /// prefetching is performed. + virtual unsigned getMaxPrefetchIterationsAhead() const = 0; + virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -1395,6 +1441,16 @@ public: return Impl.getFlatAddressSpace(); } + bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, + Intrinsic::ID IID) const override { + return Impl.collectFlatAddressOperands(OpIndexes, IID); + } + + bool rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const override { + return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); + } + bool isLoweredToCall(const Function *F) override { return Impl.isLoweredToCall(F); } @@ -1440,16 +1496,16 @@ public: bool shouldFavorBackedgeIndex(const Loop *L) const override { return Impl.shouldFavorBackedgeIndex(L); } - bool isLegalMaskedStore(Type *DataType) override { - return Impl.isLegalMaskedStore(DataType); + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override { + return Impl.isLegalMaskedStore(DataType, Alignment); } - bool isLegalMaskedLoad(Type *DataType) override { - return Impl.isLegalMaskedLoad(DataType); + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override { + return Impl.isLegalMaskedLoad(DataType, Alignment); } - bool isLegalNTStore(Type *DataType, unsigned Alignment) override { + bool isLegalNTStore(Type *DataType, Align Alignment) override { return Impl.isLegalNTStore(DataType, Alignment); } - bool isLegalNTLoad(Type *DataType, unsigned Alignment) override { + bool isLegalNTLoad(Type *DataType, Align Alignment) override { return Impl.isLegalNTLoad(DataType, Alignment); } bool isLegalMaskedScatter(Type *DataType) override { @@ -1490,8 +1546,6 @@ public: } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } - unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); } - unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); } bool shouldBuildLookupTables() override { return Impl.shouldBuildLookupTables(); } @@ -1563,8 +1617,14 @@ public: Type *Ty) override { return Impl.getIntImmCost(IID, Idx, Imm, Ty); } - unsigned getNumberOfRegisters(bool Vector) override { - return Impl.getNumberOfRegisters(Vector); + unsigned getNumberOfRegisters(unsigned ClassID) const override { + return Impl.getNumberOfRegisters(ClassID); + } + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override { + return Impl.getRegisterClassForType(Vector, Ty); + } + const char* getRegisterClassName(unsigned ClassID) const override { + return Impl.getRegisterClassName(ClassID); } unsigned getRegisterBitWidth(bool Vector) const override { return Impl.getRegisterBitWidth(Vector); @@ -1583,22 +1643,36 @@ public: return Impl.shouldConsiderAddressTypePromotion( I, AllowPromotionWithoutCommonHeader); } - unsigned getCacheLineSize() override { + unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } - llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override { + llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override { return Impl.getCacheSize(Level); } - llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override { + llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override { return Impl.getCacheAssociativity(Level); } - unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } - unsigned getMinPrefetchStride() override { + + /// Return the preferred prefetch distance in terms of instructions. + /// + unsigned getPrefetchDistance() const override { + return Impl.getPrefetchDistance(); + } + + /// Return the minimum stride necessary to trigger software + /// prefetching. + /// + unsigned getMinPrefetchStride() const override { return Impl.getMinPrefetchStride(); } - unsigned getMaxPrefetchIterationsAhead() override { + + /// Return the maximum prefetch distance in terms of loop + /// iterations. + /// + unsigned getMaxPrefetchIterationsAhead() const override { return Impl.getMaxPrefetchIterationsAhead(); } + unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index b99e1eb9adf0..a431fa0d458b 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -156,6 +156,16 @@ public: return -1; } + bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, + Intrinsic::ID IID) const { + return false; + } + + bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, + Value *OldV, Value *NewV) const { + return false; + } + bool isLoweredToCall(const Function *F) { assert(F && "A concrete function must be provided to this routine."); @@ -233,18 +243,18 @@ public: bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } - bool isLegalMaskedStore(Type *DataType) { return false; } + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; } - bool isLegalMaskedLoad(Type *DataType) { return false; } + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } - bool isLegalNTStore(Type *DataType, unsigned Alignment) { + bool isLegalNTStore(Type *DataType, Align Alignment) { // By default, assume nontemporal memory stores are available for stores // that are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); return Alignment >= DataSize && isPowerOf2_32(DataSize); } - bool isLegalNTLoad(Type *DataType, unsigned Alignment) { + bool isLegalNTLoad(Type *DataType, Align Alignment) { // By default, assume nontemporal memory loads are available for loads that // are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); @@ -284,10 +294,6 @@ public: bool isTypeLegal(Type *Ty) { return false; } - unsigned getJumpBufAlignment() { return 0; } - - unsigned getJumpBufSize() { return 0; } - bool shouldBuildLookupTables() { return true; } bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } @@ -348,7 +354,20 @@ public: return TTI::TCC_Free; } - unsigned getNumberOfRegisters(bool Vector) { return 8; } + unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } + + unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { + return Vector ? 1 : 0; + }; + + const char* getRegisterClassName(unsigned ClassID) const { + switch (ClassID) { + default: + return "Generic::Unknown Register Class"; + case 0: return "Generic::ScalarRC"; + case 1: return "Generic::VectorRC"; + } + } unsigned getRegisterBitWidth(bool Vector) const { return 32; } @@ -365,21 +384,20 @@ public: return false; } - unsigned getCacheLineSize() { return 0; } + unsigned getCacheLineSize() const { return 0; } - llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { + llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; case TargetTransformInfo::CacheLevel::L2D: return llvm::Optional<unsigned>(); } - llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } llvm::Optional<unsigned> getCacheAssociativity( - TargetTransformInfo::CacheLevel Level) { + TargetTransformInfo::CacheLevel Level) const { switch (Level) { case TargetTransformInfo::CacheLevel::L1D: LLVM_FALLTHROUGH; @@ -390,11 +408,9 @@ public: llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); } - unsigned getPrefetchDistance() { return 0; } - - unsigned getMinPrefetchStride() { return 1; } - - unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } + unsigned getPrefetchDistance() const { return 0; } + unsigned getMinPrefetchStride() const { return 1; } + unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } @@ -830,6 +846,9 @@ public: if (isa<PHINode>(U)) return TTI::TCC_Free; // Model all PHI nodes as free. + if (isa<ExtractValueInst>(U)) + return TTI::TCC_Free; // Model all ExtractValue nodes as free. + // Static alloca doesn't generate target instructions. if (auto *A = dyn_cast<AllocaInst>(U)) if (A->isStaticAlloca()) diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h index 82cf8efeea54..43ce26147c2e 100644 --- a/include/llvm/Analysis/TypeMetadataUtils.h +++ b/include/llvm/Analysis/TypeMetadataUtils.h @@ -50,6 +50,8 @@ void findDevirtualizableCallsForTypeCheckedLoad( SmallVectorImpl<Instruction *> &LoadedPtrs, SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT); + +Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M); } #endif diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h index acbdf5dca32c..a63bcec9bc41 100644 --- a/include/llvm/Analysis/Utils/Local.h +++ b/include/llvm/Analysis/Utils/Local.h @@ -32,7 +32,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, Value *Result = Constant::getNullValue(IntPtrTy); // If the GEP is inbounds, we know that none of the addressing operations will - // overflow in an unsigned sense. + // overflow in a signed sense. bool isInBounds = GEPOp->isInBounds() && !NoAssumptions; // Build a mask for high order bits. @@ -51,10 +51,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = GTI.getStructTypeOrNull()) { - if (OpC->getType()->isVectorTy()) - OpC = OpC->getSplatValue(); - - uint64_t OpValue = cast<ConstantInt>(OpC)->getZExtValue(); + uint64_t OpValue = OpC->getUniqueInteger().getZExtValue(); Size = DL.getStructLayout(STy)->getElementOffset(OpValue); if (Size) @@ -63,20 +60,31 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, continue; } + // Splat the constant if needed. + if (IntPtrTy->isVectorTy() && !OpC->getType()->isVectorTy()) + OpC = ConstantVector::getSplat(IntPtrTy->getVectorNumElements(), OpC); + Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/); + Scale = + ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); // Emit an add instruction. Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; } + + // Splat the index if needed. + if (IntPtrTy->isVectorTy() && !Op->getType()->isVectorTy()) + Op = Builder->CreateVectorSplat(IntPtrTy->getVectorNumElements(), Op); + // Convert to correct type. if (Op->getType() != IntPtrTy) Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { // We'll let instcombine(mul) convert this to a shl if possible. Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size), - GEP->getName()+".idx", isInBounds /*NUW*/); + GEP->getName() + ".idx", false /*NUW*/, + isInBounds /*NSW*/); } // Emit an add instruction. diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index fa7e0e0eef7e..33b064fcf9d2 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -242,19 +242,21 @@ class Value; /// This is a wrapper around Value::stripAndAccumulateConstantOffsets that /// creates and later unpacks the required APInt. inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &DL) { + const DataLayout &DL, + bool AllowNonInbounds = true) { APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); Value *Base = - Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, - /* AllowNonInbounds */ true); + Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds); + Offset = OffsetAPInt.getSExtValue(); return Base; } - inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr, - int64_t &Offset, - const DataLayout &DL) { - return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, - DL); + inline const Value * + GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, + const DataLayout &DL, + bool AllowNonInbounds = true) { + return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, DL, + AllowNonInbounds); } /// Returns true if the GEP is based on a pointer to a string (array of @@ -307,20 +309,26 @@ class Value; uint64_t GetStringLength(const Value *V, unsigned CharSize = 8); /// This function returns call pointer argument that is considered the same by - /// aliasing rules. You CAN'T use it to replace one value with another. - const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call); - inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) { + /// aliasing rules. You CAN'T use it to replace one value with another. If + /// \p MustPreserveNullness is true, the call must preserve the nullness of + /// the pointer. + const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call, + bool MustPreserveNullness); + inline Value * + getArgumentAliasingToReturnedPointer(CallBase *Call, + bool MustPreserveNullness) { return const_cast<Value *>(getArgumentAliasingToReturnedPointer( - const_cast<const CallBase *>(Call))); + const_cast<const CallBase *>(Call), MustPreserveNullness)); } - // {launder,strip}.invariant.group returns pointer that aliases its argument, - // and it only captures pointer by returning it. - // These intrinsics are not marked as nocapture, because returning is - // considered as capture. The arguments are not marked as returned neither, - // because it would make it useless. + /// {launder,strip}.invariant.group returns pointer that aliases its argument, + /// and it only captures pointer by returning it. + /// These intrinsics are not marked as nocapture, because returning is + /// considered as capture. The arguments are not marked as returned neither, + /// because it would make it useless. If \p MustPreserveNullness is true, + /// the intrinsic must preserve the nullness of the pointer. bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( - const CallBase *Call); + const CallBase *Call, bool MustPreserveNullness); /// This method strips off any GEP address adjustments and pointer casts from /// the specified value, returning the original object being addressed. Note @@ -376,6 +384,13 @@ class Value; /// Return true if the only users of this pointer are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); + /// Return true if speculation of the given load must be suppressed to avoid + /// ordering or interfering with an active sanitizer. If not suppressed, + /// dereferenceability and alignment must be proven separately. Note: This + /// is only needed for raw reasoning; if you use the interface below + /// (isSafeToSpeculativelyExecute), this is handled internally. + bool mustSuppressSpeculation(const LoadInst &LI); + /// Return true if the instruction does not have any effects besides /// calculating the result and does not have undefined behavior. /// @@ -605,12 +620,12 @@ class Value; SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0); + inline SelectPatternResult - matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS, - Instruction::CastOps *CastOp = nullptr) { - Value *L = const_cast<Value*>(LHS); - Value *R = const_cast<Value*>(RHS); - auto Result = matchSelectPattern(const_cast<Value*>(V), L, R); + matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS) { + Value *L = const_cast<Value *>(LHS); + Value *R = const_cast<Value *>(RHS); + auto Result = matchSelectPattern(const_cast<Value *>(V), L, R); LHS = L; RHS = R; return Result; @@ -654,6 +669,12 @@ class Value; Optional<bool> isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL); + + /// If Ptr1 is provably equal to Ptr2 plus a constant offset, return that + /// offset. For example, Ptr1 might be &A[42], and Ptr2 might be &A[40]. In + /// this case offset would be -8. + Optional<int64_t> isPointerOffset(const Value *Ptr1, const Value *Ptr2, + const DataLayout &DL); } // end namespace llvm #endif // LLVM_ANALYSIS_VALUETRACKING_H diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h index d93d2bc4570b..4a61c2bc35c7 100644 --- a/include/llvm/Analysis/VectorUtils.h +++ b/include/llvm/Analysis/VectorUtils.h @@ -15,18 +15,129 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/CheckedArithmetic.h" namespace llvm { +/// Describes the type of Parameters +enum class VFParamKind { + Vector, // No semantic information. + OMP_Linear, // declare simd linear(i) + OMP_LinearRef, // declare simd linear(ref(i)) + OMP_LinearVal, // declare simd linear(val(i)) + OMP_LinearUVal, // declare simd linear(uval(i)) + OMP_LinearPos, // declare simd linear(i:c) uniform(c) + OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) + OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) + OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c + OMP_Uniform, // declare simd uniform(i) + GlobalPredicate, // Global logical predicate that acts on all lanes + // of the input and output mask concurrently. For + // example, it is implied by the `M` token in the + // Vector Function ABI mangled name. + Unknown +}; + +/// Describes the type of Instruction Set Architecture +enum class VFISAKind { + AdvancedSIMD, // AArch64 Advanced SIMD (NEON) + SVE, // AArch64 Scalable Vector Extension + SSE, // x86 SSE + AVX, // x86 AVX + AVX2, // x86 AVX2 + AVX512, // x86 AVX512 + Unknown // Unknown ISA +}; + +/// Encapsulates information needed to describe a parameter. +/// +/// The description of the parameter is not linked directly to +/// OpenMP or any other vector function description. This structure +/// is extendible to handle other paradigms that describe vector +/// functions and their parameters. +struct VFParameter { + unsigned ParamPos; // Parameter Position in Scalar Function. + VFParamKind ParamKind; // Kind of Parameter. + int LinearStepOrPos = 0; // Step or Position of the Parameter. + Align Alignment = Align(); // Optional aligment in bytes, defaulted to 1. + + // Comparison operator. + bool operator==(const VFParameter &Other) const { + return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == + std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, + Other.Alignment); + } +}; + +/// Contains the information about the kind of vectorization +/// available. +/// +/// This object in independent on the paradigm used to +/// represent vector functions. in particular, it is not attached to +/// any target-specific ABI. +struct VFShape { + unsigned VF; // Vectorization factor. + bool IsScalable; // True if the function is a scalable function. + VFISAKind ISA; // Instruction Set Architecture. + SmallVector<VFParameter, 8> Parameters; // List of parameter informations. + // Comparison operator. + bool operator==(const VFShape &Other) const { + return std::tie(VF, IsScalable, ISA, Parameters) == + std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters); + } +}; + +/// Holds the VFShape for a specific scalar to vector function mapping. +struct VFInfo { + VFShape Shape; // Classification of the vector function. + StringRef ScalarName; // Scalar Function Name. + StringRef VectorName; // Vector Function Name associated to this VFInfo. + + // Comparison operator. + bool operator==(const VFInfo &Other) const { + return std::tie(Shape, ScalarName, VectorName) == + std::tie(Shape, Other.ScalarName, Other.VectorName); + } +}; + +namespace VFABI { +/// Function to contruct a VFInfo out of a mangled names in the +/// following format: +/// +/// <VFABI_name>{(<redirection>)} +/// +/// where <VFABI_name> is the name of the vector function, mangled according +/// to the rules described in the Vector Function ABI of the target vector +/// extentsion (or <isa> from now on). The <VFABI_name> is in the following +/// format: +/// +/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] +/// +/// This methods support demangling rules for the following <isa>: +/// +/// * AArch64: https://developer.arm.com/docs/101129/latest +/// +/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and +/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt +/// +/// +/// +/// \param MangledName -> input string in the format +/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]. +Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName); + +/// Retrieve the `VFParamKind` from a string token. +VFParamKind getVFParamKindFromString(const StringRef Token); +} // end namespace VFABI + template <typename T> class ArrayRef; class DemandedBits; class GetElementPtrInst; template <typename InstTy> class InterleaveGroup; class Loop; class ScalarEvolution; +class TargetLibraryInfo; class TargetTransformInfo; class Type; class Value; @@ -270,13 +381,12 @@ APInt possiblyDemandedEltsInMask(Value *Mask); /// the interleaved store group doesn't allow gaps. template <typename InstTy> class InterleaveGroup { public: - InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align) - : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} - - InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align) - : Align(Align), InsertPos(Instr) { - assert(Align && "The alignment should be non-zero"); + InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) + : Factor(Factor), Reverse(Reverse), Alignment(Alignment), + InsertPos(nullptr) {} + InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) + : Alignment(Alignment), InsertPos(Instr) { Factor = std::abs(Stride); assert(Factor > 1 && "Invalid interleave factor"); @@ -286,7 +396,7 @@ public: bool isReverse() const { return Reverse; } uint32_t getFactor() const { return Factor; } - uint32_t getAlignment() const { return Align; } + uint32_t getAlignment() const { return Alignment.value(); } uint32_t getNumMembers() const { return Members.size(); } /// Try to insert a new member \p Instr with index \p Index and @@ -294,9 +404,7 @@ public: /// negative if it is the new leader. /// /// \returns false if the instruction doesn't belong to the group. - bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) { - assert(NewAlign && "The new member's alignment should be non-zero"); - + bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { // Make sure the key fits in an int32_t. Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey); if (!MaybeKey) @@ -328,7 +436,7 @@ public: } // It's always safe to select the minimum alignment. - Align = std::min(Align, NewAlign); + Alignment = std::min(Alignment, NewAlign); Members[Key] = Instr; return true; } @@ -387,7 +495,7 @@ public: private: uint32_t Factor; // Interleave Factor. bool Reverse; - uint32_t Align; + Align Alignment; DenseMap<int32_t, InstTy *> Members; int32_t SmallestKey = 0; int32_t LargestKey = 0; @@ -504,8 +612,8 @@ private: struct StrideDescriptor { StrideDescriptor() = default; StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, - unsigned Align) - : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} + Align Alignment) + : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} // The access's stride. It is negative for a reverse access. int64_t Stride = 0; @@ -517,7 +625,7 @@ private: uint64_t Size = 0; // The alignment of this access. - unsigned Align = 0; + Align Alignment; }; /// A type for holding instructions and their stride descriptors. @@ -528,11 +636,11 @@ private: /// /// \returns the newly created interleave group. InterleaveGroup<Instruction> * - createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { + createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { assert(!InterleaveGroupMap.count(Instr) && "Already in an interleaved access group"); InterleaveGroupMap[Instr] = - new InterleaveGroup<Instruction>(Instr, Stride, Align); + new InterleaveGroup<Instruction>(Instr, Stride, Alignment); InterleaveGroups.insert(InterleaveGroupMap[Instr]); return InterleaveGroupMap[Instr]; } |