aboutsummaryrefslogtreecommitdiff
path: root/include/llvm/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/Analysis')
-rw-r--r--include/llvm/Analysis/AliasAnalysis.h2
-rw-r--r--include/llvm/Analysis/AliasSetTracker.h5
-rw-r--r--include/llvm/Analysis/AssumptionCache.h4
-rw-r--r--include/llvm/Analysis/CFG.h2
-rw-r--r--include/llvm/Analysis/CFLAndersAliasAnalysis.h5
-rw-r--r--include/llvm/Analysis/CFLSteensAliasAnalysis.h5
-rw-r--r--include/llvm/Analysis/CGSCCPassManager.h31
-rw-r--r--include/llvm/Analysis/CaptureTracking.h6
-rw-r--r--include/llvm/Analysis/DDG.h430
-rw-r--r--include/llvm/Analysis/DOTGraphTraitsPass.h4
-rw-r--r--include/llvm/Analysis/DependenceGraphBuilder.h119
-rw-r--r--include/llvm/Analysis/DivergenceAnalysis.h16
-rw-r--r--include/llvm/Analysis/GlobalsModRef.h12
-rw-r--r--include/llvm/Analysis/InstructionSimplify.h36
-rw-r--r--include/llvm/Analysis/LazyCallGraph.h10
-rw-r--r--include/llvm/Analysis/LegacyDivergenceAnalysis.h16
-rw-r--r--include/llvm/Analysis/Loads.h22
-rw-r--r--include/llvm/Analysis/LoopAnalysisManager.h10
-rw-r--r--include/llvm/Analysis/LoopCacheAnalysis.h281
-rw-r--r--include/llvm/Analysis/LoopInfo.h37
-rw-r--r--include/llvm/Analysis/LoopInfoImpl.h8
-rw-r--r--include/llvm/Analysis/MemoryBuiltins.h26
-rw-r--r--include/llvm/Analysis/MemoryDependenceAnalysis.h14
-rw-r--r--include/llvm/Analysis/MemorySSA.h4
-rw-r--r--include/llvm/Analysis/MemorySSAUpdater.h3
-rw-r--r--include/llvm/Analysis/MustExecute.h285
-rw-r--r--include/llvm/Analysis/Passes.h7
-rw-r--r--include/llvm/Analysis/ProfileSummaryInfo.h23
-rw-r--r--include/llvm/Analysis/RegionInfoImpl.h2
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h6
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h22
-rw-r--r--include/llvm/Analysis/TargetLibraryInfo.h17
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h180
-rw-r--r--include/llvm/Analysis/TargetTransformInfoImpl.h55
-rw-r--r--include/llvm/Analysis/TypeMetadataUtils.h2
-rw-r--r--include/llvm/Analysis/Utils/Local.h22
-rw-r--r--include/llvm/Analysis/ValueTracking.h67
-rw-r--r--include/llvm/Analysis/VectorUtils.h144
38 files changed, 1732 insertions, 208 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 948341554f23..282142f51bb3 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase {
/// A pointer to the AAResults object that this AAResult is
/// aggregated within. May be null if not aggregated.
- AAResults *AAR;
+ AAResults *AAR = nullptr;
/// Helper to dispatch calls back through the derived type.
DerivedT &derived() { return static_cast<DerivedT &>(*this); }
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 34a509b7f4bb..187317e3831b 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -87,10 +87,11 @@ class AliasSet : public ilist_node<AliasSet> {
AAInfo = NewAAInfo;
else {
AAMDNodes Intersection(AAInfo.intersect(NewAAInfo));
- if (!Intersection) {
+ if (!Intersection.TBAA || !Intersection.Scope ||
+ !Intersection.NoAlias) {
// NewAAInfo conflicts with AAInfo.
AAInfo = DenseMapInfo<AAMDNodes>::getTombstoneKey();
- return SizeChanged;
+ SizeChanged = true;
}
AAInfo = Intersection;
}
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index b42846472f2e..0efbd59023d6 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -73,8 +73,8 @@ class AssumptionCache {
/// Get the vector of assumptions which affect a value from the cache.
SmallVector<WeakTrackingVH, 1> &getOrInsertAffectedValues(Value *V);
- /// Copy affected values in the cache for OV to be affected values for NV.
- void copyAffectedValuesInCache(Value *OV, Value *NV);
+ /// Move affected values in the cache for OV to be affected values for NV.
+ void transferAffectedValuesInCache(Value *OV, Value *NV);
/// Flag tracking whether we have scanned the function yet.
///
diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h
index bb55e76ac86a..68f137ba622c 100644
--- a/include/llvm/Analysis/CFG.h
+++ b/include/llvm/Analysis/CFG.h
@@ -46,6 +46,8 @@ unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
///
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges = false);
+bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ,
+ bool AllowIdenticalEdges = false);
/// Determine whether instruction 'To' is reachable from 'From', without passing
/// through any blocks in ExclusionSet, returning true if uncertain.
diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 7c8b42b1d8d2..5f5e52af3d88 100644
--- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -41,7 +41,8 @@ class CFLAndersAAResult : public AAResultBase<CFLAndersAAResult> {
class FunctionInfo;
public:
- explicit CFLAndersAAResult(const TargetLibraryInfo &TLI);
+ explicit CFLAndersAAResult(
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
CFLAndersAAResult(CFLAndersAAResult &&RHS);
~CFLAndersAAResult();
@@ -74,7 +75,7 @@ private:
/// Build summary for a given function
FunctionInfo buildInfoFrom(const Function &);
- const TargetLibraryInfo &TLI;
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
/// Cached mapping of Functions to their StratifiedSets.
/// If a function's sets are currently being built, it is marked
diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index cc7a47cd9a5f..135321616b7c 100644
--- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -42,7 +42,8 @@ class CFLSteensAAResult : public AAResultBase<CFLSteensAAResult> {
class FunctionInfo;
public:
- explicit CFLSteensAAResult(const TargetLibraryInfo &TLI);
+ explicit CFLSteensAAResult(
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI);
CFLSteensAAResult(CFLSteensAAResult &&Arg);
~CFLSteensAAResult();
@@ -90,7 +91,7 @@ public:
}
private:
- const TargetLibraryInfo &TLI;
+ std::function<const TargetLibraryInfo &(Function &)> GetTLI;
/// Cached mapping of Functions to their StratifiedSets.
/// If a function's sets are currently being built, it is marked
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 8af5fb86995a..933f2210dafc 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -88,6 +88,7 @@
#ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H
#define LLVM_ANALYSIS_CGSCCPASSMANAGER_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/STLExtras.h"
@@ -583,10 +584,12 @@ public:
SmallVectorImpl<WeakTrackingVH> &CallHandles) {
assert(CallHandles.empty() && "Must start with a clear set of handles.");
- SmallVector<CallCount, 4> CallCounts;
+ SmallDenseMap<Function *, CallCount> CallCounts;
+ CallCount CountLocal = {0, 0};
for (LazyCallGraph::Node &N : C) {
- CallCounts.push_back({0, 0});
- CallCount &Count = CallCounts.back();
+ CallCount &Count =
+ CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal))
+ .first->second;
for (Instruction &I : instructions(N.getFunction()))
if (auto CS = CallSite(&I)) {
if (CS.getCalledFunction()) {
@@ -626,8 +629,6 @@ public:
// Check that we didn't miss any update scenario.
assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
assert(C->begin() != C->end() && "Cannot have an empty SCC!");
- assert((int)CallCounts.size() == C->size() &&
- "Cannot have changed the size of the SCC!");
// Check whether any of the handles were devirtualized.
auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) {
@@ -642,7 +643,7 @@ public:
if (!F)
return false;
- LLVM_DEBUG(dbgs() << "Found devirutalized call from "
+ LLVM_DEBUG(dbgs() << "Found devirtualized call from "
<< CS.getParent()->getParent()->getName() << " to "
<< F->getName() << "\n");
@@ -664,12 +665,20 @@ public:
// manner of transformations such as DCE and other things, but seems to
// work well in practice.
if (!Devirt)
- for (int i = 0, Size = C->size(); i < Size; ++i)
- if (CallCounts[i].Indirect > NewCallCounts[i].Indirect &&
- CallCounts[i].Direct < NewCallCounts[i].Direct) {
- Devirt = true;
- break;
+ // Iterate over the keys in NewCallCounts, if Function also exists in
+ // CallCounts, make the check below.
+ for (auto &Pair : NewCallCounts) {
+ auto &CallCountNew = Pair.second;
+ auto CountIt = CallCounts.find(Pair.first);
+ if (CountIt != CallCounts.end()) {
+ const auto &CallCountOld = CountIt->second;
+ if (CallCountOld.Indirect > CallCountNew.Indirect &&
+ CallCountOld.Direct < CallCountNew.Direct) {
+ Devirt = true;
+ break;
+ }
}
+ }
if (!Devirt) {
PA.intersect(std::move(PassPA));
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index ca7abd34fea2..29921a51d5be 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -17,6 +17,7 @@ namespace llvm {
class Value;
class Use;
+ class DataLayout;
class Instruction;
class DominatorTree;
class OrderedBasicBlock;
@@ -83,6 +84,11 @@ namespace llvm {
/// use U. Return true to stop the traversal or false to continue looking
/// for more capturing instructions.
virtual bool captured(const Use *U) = 0;
+
+ /// isDereferenceableOrNull - Overload to allow clients with additional
+ /// knowledge about pointer dereferenceability to provide it and thereby
+ /// avoid conservative responses when a pointer is compared to null.
+ virtual bool isDereferenceableOrNull(Value *O, const DataLayout &DL);
};
/// PointerMayBeCaptured - Visit the value and the values derived from it and
diff --git a/include/llvm/Analysis/DDG.h b/include/llvm/Analysis/DDG.h
new file mode 100644
index 000000000000..0e1eb9d2cda3
--- /dev/null
+++ b/include/llvm/Analysis/DDG.h
@@ -0,0 +1,430 @@
+//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Data-Dependence Graph (DDG).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DDG_H
+#define LLVM_ANALYSIS_DDG_H
+
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/Instructions.h"
+#include <unordered_map>
+
+namespace llvm {
+class DDGNode;
+class DDGEdge;
+using DDGNodeBase = DGNode<DDGNode, DDGEdge>;
+using DDGEdgeBase = DGEdge<DDGNode, DDGEdge>;
+using DDGBase = DirectedGraph<DDGNode, DDGEdge>;
+class LPMUpdater;
+
+/// Data Dependence Graph Node
+/// The graph can represent the following types of nodes:
+/// 1. Single instruction node containing just one instruction.
+/// 2. Multiple instruction node where two or more instructions from
+/// the same basic block are merged into one node.
+/// 3. Root node is a special node that connects to all components such that
+/// there is always a path from it to any node in the graph.
+class DDGNode : public DDGNodeBase {
+public:
+ using InstructionListType = SmallVectorImpl<Instruction *>;
+
+ enum class NodeKind {
+ Unknown,
+ SingleInstruction,
+ MultiInstruction,
+ Root,
+ };
+
+ DDGNode() = delete;
+ DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {}
+ DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {}
+ DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {}
+ virtual ~DDGNode() = 0;
+
+ DDGNode &operator=(const DDGNode &N) {
+ DGNode::operator=(N);
+ Kind = N.Kind;
+ return *this;
+ }
+
+ DDGNode &operator=(DDGNode &&N) {
+ DGNode::operator=(std::move(N));
+ Kind = N.Kind;
+ return *this;
+ }
+
+ /// Getter for the kind of this node.
+ NodeKind getKind() const { return Kind; }
+
+ /// Collect a list of instructions, in \p IList, for which predicate \p Pred
+ /// evaluates to true when iterating over instructions of this node. Return
+ /// true if at least one instruction was collected, and false otherwise.
+ bool collectInstructions(llvm::function_ref<bool(Instruction *)> const &Pred,
+ InstructionListType &IList) const;
+
+protected:
+ /// Setter for the kind of this node.
+ void setKind(NodeKind K) { Kind = K; }
+
+private:
+ NodeKind Kind;
+};
+
+/// Subclass of DDGNode representing the root node of the graph.
+/// There should only be one such node in a given graph.
+class RootDDGNode : public DDGNode {
+public:
+ RootDDGNode() : DDGNode(NodeKind::Root) {}
+ RootDDGNode(const RootDDGNode &N) = delete;
+ RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {}
+ ~RootDDGNode() {}
+
+ /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+ static bool classof(const DDGNode *N) {
+ return N->getKind() == NodeKind::Root;
+ }
+ static bool classof(const RootDDGNode *N) { return true; }
+};
+
+/// Subclass of DDGNode representing single or multi-instruction nodes.
+class SimpleDDGNode : public DDGNode {
+public:
+ SimpleDDGNode() = delete;
+ SimpleDDGNode(Instruction &I);
+ SimpleDDGNode(const SimpleDDGNode &N);
+ SimpleDDGNode(SimpleDDGNode &&N);
+ ~SimpleDDGNode();
+
+ SimpleDDGNode &operator=(const SimpleDDGNode &N) {
+ DDGNode::operator=(N);
+ InstList = N.InstList;
+ return *this;
+ }
+
+ SimpleDDGNode &operator=(SimpleDDGNode &&N) {
+ DDGNode::operator=(std::move(N));
+ InstList = std::move(N.InstList);
+ return *this;
+ }
+
+ /// Get the list of instructions in this node.
+ const InstructionListType &getInstructions() const {
+ assert(!InstList.empty() && "Instruction List is empty.");
+ return InstList;
+ }
+ InstructionListType &getInstructions() {
+ return const_cast<InstructionListType &>(
+ static_cast<const SimpleDDGNode *>(this)->getInstructions());
+ }
+
+ /// Get the first/last instruction in the node.
+ Instruction *getFirstInstruction() const { return getInstructions().front(); }
+ Instruction *getLastInstruction() const { return getInstructions().back(); }
+
+ /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+ static bool classof(const DDGNode *N) {
+ return N->getKind() == NodeKind::SingleInstruction ||
+ N->getKind() == NodeKind::MultiInstruction;
+ }
+ static bool classof(const SimpleDDGNode *N) { return true; }
+
+private:
+ /// Append the list of instructions in \p Input to this node.
+ void appendInstructions(const InstructionListType &Input) {
+ setKind((InstList.size() == 0 && Input.size() == 1)
+ ? NodeKind::SingleInstruction
+ : NodeKind::MultiInstruction);
+ InstList.insert(InstList.end(), Input.begin(), Input.end());
+ }
+ void appendInstructions(const SimpleDDGNode &Input) {
+ appendInstructions(Input.getInstructions());
+ }
+
+ /// List of instructions associated with a single or multi-instruction node.
+ SmallVector<Instruction *, 2> InstList;
+};
+
+/// Data Dependency Graph Edge.
+/// An edge in the DDG can represent a def-use relationship or
+/// a memory dependence based on the result of DependenceAnalysis.
+/// A rooted edge connects the root node to one of the components
+/// of the graph.
+class DDGEdge : public DDGEdgeBase {
+public:
+ /// The kind of edge in the DDG
+ enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted };
+
+ explicit DDGEdge(DDGNode &N) = delete;
+ DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {}
+ DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {}
+ DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {}
+ DDGEdge &operator=(const DDGEdge &E) {
+ DDGEdgeBase::operator=(E);
+ Kind = E.Kind;
+ return *this;
+ }
+
+ DDGEdge &operator=(DDGEdge &&E) {
+ DDGEdgeBase::operator=(std::move(E));
+ Kind = E.Kind;
+ return *this;
+ }
+
+ /// Get the edge kind
+ EdgeKind getKind() const { return Kind; };
+
+ /// Return true if this is a def-use edge, and false otherwise.
+ bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; }
+
+ /// Return true if this is a memory dependence edge, and false otherwise.
+ bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; }
+
+ /// Return true if this is an edge stemming from the root node, and false
+ /// otherwise.
+ bool isRooted() const { return Kind == EdgeKind::Rooted; }
+
+private:
+ EdgeKind Kind;
+};
+
+/// Encapsulate some common data and functionality needed for different
+/// variations of data dependence graphs.
+template <typename NodeType> class DependenceGraphInfo {
+public:
+ using DependenceList = SmallVector<std::unique_ptr<Dependence>, 1>;
+
+ DependenceGraphInfo() = delete;
+ DependenceGraphInfo(const DependenceGraphInfo &G) = delete;
+ DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo)
+ : Name(N), DI(DepInfo), Root(nullptr) {}
+ DependenceGraphInfo(DependenceGraphInfo &&G)
+ : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {}
+ virtual ~DependenceGraphInfo() {}
+
+ /// Return the label that is used to name this graph.
+ const StringRef getName() const { return Name; }
+
+ /// Return the root node of the graph.
+ NodeType &getRoot() const {
+ assert(Root && "Root node is not available yet. Graph construction may "
+ "still be in progress\n");
+ return *Root;
+ }
+
+protected:
+ // Name of the graph.
+ std::string Name;
+
+ // Store a copy of DependenceInfo in the graph, so that individual memory
+ // dependencies don't need to be stored. Instead when the dependence is
+ // queried it is recomputed using @DI.
+ const DependenceInfo DI;
+
+ // A special node in the graph that has an edge to every connected component of
+ // the graph, to ensure all nodes are reachable in a graph walk.
+ NodeType *Root = nullptr;
+};
+
+using DDGInfo = DependenceGraphInfo<DDGNode>;
+
+/// Data Dependency Graph
+class DataDependenceGraph : public DDGBase, public DDGInfo {
+ friend class DDGBuilder;
+
+public:
+ using NodeType = DDGNode;
+ using EdgeType = DDGEdge;
+
+ DataDependenceGraph() = delete;
+ DataDependenceGraph(const DataDependenceGraph &G) = delete;
+ DataDependenceGraph(DataDependenceGraph &&G)
+ : DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
+ DataDependenceGraph(Function &F, DependenceInfo &DI);
+ DataDependenceGraph(const Loop &L, DependenceInfo &DI);
+ ~DataDependenceGraph();
+
+protected:
+ /// Add node \p N to the graph, if it's not added yet, and keep track of
+ /// the root node. Return true if node is successfully added.
+ bool addNode(NodeType &N);
+
+};
+
+/// Concrete implementation of a pure data dependence graph builder. This class
+/// provides custom implementation for the pure-virtual functions used in the
+/// generic dependence graph build algorithm.
+///
+/// For information about time complexity of the build algorithm see the
+/// comments near the declaration of AbstractDependenceGraphBuilder.
+class DDGBuilder : public AbstractDependenceGraphBuilder<DataDependenceGraph> {
+public:
+ DDGBuilder(DataDependenceGraph &G, DependenceInfo &D,
+ const BasicBlockListType &BBs)
+ : AbstractDependenceGraphBuilder(G, D, BBs) {}
+ DDGNode &createRootNode() final override {
+ auto *RN = new RootDDGNode();
+ assert(RN && "Failed to allocate memory for DDG root node.");
+ Graph.addNode(*RN);
+ return *RN;
+ }
+ DDGNode &createFineGrainedNode(Instruction &I) final override {
+ auto *SN = new SimpleDDGNode(I);
+ assert(SN && "Failed to allocate memory for simple DDG node.");
+ Graph.addNode(*SN);
+ return *SN;
+ }
+ DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override {
+ auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse);
+ assert(E && "Failed to allocate memory for edge");
+ Graph.connect(Src, Tgt, *E);
+ return *E;
+ }
+ DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override {
+ auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence);
+ assert(E && "Failed to allocate memory for edge");
+ Graph.connect(Src, Tgt, *E);
+ return *E;
+ }
+ DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override {
+ auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted);
+ assert(E && "Failed to allocate memory for edge");
+ assert(isa<RootDDGNode>(Src) && "Expected root node");
+ Graph.connect(Src, Tgt, *E);
+ return *E;
+ }
+
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N);
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G);
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// Analysis pass that builds the DDG for a loop.
+class DDGAnalysis : public AnalysisInfoMixin<DDGAnalysis> {
+public:
+ using Result = std::unique_ptr<DataDependenceGraph>;
+ Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
+
+private:
+ friend AnalysisInfoMixin<DDGAnalysis>;
+ static AnalysisKey Key;
+};
+
+/// Textual printer pass for the DDG of a loop.
+class DDGAnalysisPrinterPass : public PassInfoMixin<DDGAnalysisPrinterPass> {
+public:
+ explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+private:
+ raw_ostream &OS;
+};
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for the DDG
+//===--------------------------------------------------------------------===//
+
+/// non-const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<DDGNode *> {
+ using NodeRef = DDGNode *;
+
+ static DDGNode *DDGGetTargetNode(DGEdge<DDGNode, DDGEdge> *P) {
+ return &P->getTargetNode();
+ }
+
+ // Provide a mapped iterator so that the GraphTrait-based implementations can
+ // find the target nodes without having to explicitly go through the edges.
+ using ChildIteratorType =
+ mapped_iterator<DDGNode::iterator, decltype(&DDGGetTargetNode)>;
+ using ChildEdgeIteratorType = DDGNode::iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType(N->end(), &DDGGetTargetNode);
+ }
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ return N->begin();
+ }
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<DataDependenceGraph *> : public GraphTraits<DDGNode *> {
+ using nodes_iterator = DataDependenceGraph::iterator;
+ static NodeRef getEntryNode(DataDependenceGraph *DG) {
+ return &DG->getRoot();
+ }
+ static nodes_iterator nodes_begin(DataDependenceGraph *DG) {
+ return DG->begin();
+ }
+ static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); }
+};
+
+/// const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<const DDGNode *> {
+ using NodeRef = const DDGNode *;
+
+ static const DDGNode *DDGGetTargetNode(const DGEdge<DDGNode, DDGEdge> *P) {
+ return &P->getTargetNode();
+ }
+
+ // Provide a mapped iterator so that the GraphTrait-based implementations can
+ // find the target nodes without having to explicitly go through the edges.
+ using ChildIteratorType =
+ mapped_iterator<DDGNode::const_iterator, decltype(&DDGGetTargetNode)>;
+ using ChildEdgeIteratorType = DDGNode::const_iterator;
+
+ static NodeRef getEntryNode(NodeRef N) { return N; }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType(N->end(), &DDGGetTargetNode);
+ }
+
+ static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+ return N->begin();
+ }
+ static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<const DataDependenceGraph *>
+ : public GraphTraits<const DDGNode *> {
+ using nodes_iterator = DataDependenceGraph::const_iterator;
+ static NodeRef getEntryNode(const DataDependenceGraph *DG) {
+ return &DG->getRoot();
+ }
+ static nodes_iterator nodes_begin(const DataDependenceGraph *DG) {
+ return DG->begin();
+ }
+ static nodes_iterator nodes_end(const DataDependenceGraph *DG) {
+ return DG->end();
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DDG_H
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index 0410a3314659..c9e8df5db1c2 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -99,7 +99,7 @@ public:
errs() << "Writing '" << Filename << "'...";
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
std::string Title = GraphName + " for '" + F.getName().str() + "' function";
@@ -162,7 +162,7 @@ public:
errs() << "Writing '" << Filename << "'...";
- raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+ raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
std::string Title = DOTGraphTraits<GraphT>::getGraphName(Graph);
if (!EC)
diff --git a/include/llvm/Analysis/DependenceGraphBuilder.h b/include/llvm/Analysis/DependenceGraphBuilder.h
new file mode 100644
index 000000000000..5f4bdb47043b
--- /dev/null
+++ b/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -0,0 +1,119 @@
+//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a builder interface that can be used to populate dependence
+// graphs such as DDG and PDG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+/// This abstract builder class defines a set of high-level steps for creating
+/// DDG-like graphs. The client code is expected to inherit from this class and
+/// define concrete implementation for each of the pure virtual functions used
+/// in the high-level algorithm.
+template <class GraphType> class AbstractDependenceGraphBuilder {
+protected:
+ using BasicBlockListType = SmallVectorImpl<BasicBlock *>;
+
+private:
+ using NodeType = typename GraphType::NodeType;
+ using EdgeType = typename GraphType::EdgeType;
+
+public:
+ using ClassesType = EquivalenceClasses<BasicBlock *>;
+ using NodeListType = SmallVector<NodeType *, 4>;
+
+ AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D,
+ const BasicBlockListType &BBs)
+ : Graph(G), DI(D), BBList(BBs) {}
+ virtual ~AbstractDependenceGraphBuilder() {}
+
+ /// The main entry to the graph construction algorithm. It starts by
+ /// creating nodes in increasing order of granularity and then
+ /// adds def-use and memory edges.
+ ///
+ /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V
+ /// is the number of vertecies (nodes) and I is the number of instructions in
+ /// each node. The total number of instructions, N, is equal to V * I,
+ /// therefore the worst-case time complexity is O(N^2). The average time
+ /// complexity is O((N^2)/2).
+ void populate() {
+ createFineGrainedNodes();
+ createDefUseEdges();
+ createMemoryDependencyEdges();
+ createAndConnectRootNode();
+ }
+
+ /// Create fine grained nodes. These are typically atomic nodes that
+ /// consist of a single instruction.
+ void createFineGrainedNodes();
+
+ /// Analyze the def-use chains and create edges from the nodes containing
+ /// definitions to the nodes containing the uses.
+ void createDefUseEdges();
+
+ /// Analyze data dependencies that exist between memory loads or stores,
+ /// in the graph nodes and create edges between them.
+ void createMemoryDependencyEdges();
+
+ /// Create a root node and add edges such that each node in the graph is
+ /// reachable from the root.
+ void createAndConnectRootNode();
+
+protected:
+ /// Create the root node of the graph.
+ virtual NodeType &createRootNode() = 0;
+
+ /// Create an atomic node in the graph given a single instruction.
+ virtual NodeType &createFineGrainedNode(Instruction &I) = 0;
+
+ /// Create a def-use edge going from \p Src to \p Tgt.
+ virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+ /// Create a memory dependence edge going from \p Src to \p Tgt.
+ virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+ /// Create a rooted edge going from \p Src to \p Tgt .
+ virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+ /// Deallocate memory of edge \p E.
+ virtual void destroyEdge(EdgeType &E) { delete &E; }
+
+ /// Deallocate memory of node \p N.
+ virtual void destroyNode(NodeType &N) { delete &N; }
+
+ /// Map types to map instructions to nodes used when populating the graph.
+ using InstToNodeMap = DenseMap<Instruction *, NodeType *>;
+
+ /// Reference to the graph that gets built by a concrete implementation of
+ /// this builder.
+ GraphType &Graph;
+
+ /// Dependence information used to create memory dependence edges in the
+ /// graph.
+ DependenceInfo &DI;
+
+ /// The list of basic blocks to consider when building the graph.
+ const BasicBlockListType &BBList;
+
+ /// A mapping from instructions to the corresponding nodes in the graph.
+ InstToNodeMap IMap;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h
index 3cfb9d13df94..2fac9c8b4b34 100644
--- a/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/include/llvm/Analysis/DivergenceAnalysis.h
@@ -73,9 +73,12 @@ public:
/// operands
bool isAlwaysUniform(const Value &Val) const;
- /// \brief Whether \p Val is a divergent value
+ /// \brief Whether \p Val is divergent at its definition.
bool isDivergent(const Value &Val) const;
+ /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent.
+ bool isDivergentUse(const Use &U) const;
+
void print(raw_ostream &OS, const Module *) const;
private:
@@ -189,12 +192,19 @@ public:
/// The GPU kernel this analysis result is for
const Function &getFunction() const { return DA.getFunction(); }
- /// Whether \p V is divergent.
+ /// Whether \p V is divergent at its definition.
bool isDivergent(const Value &V) const;
- /// Whether \p V is uniform/non-divergent
+ /// Whether \p U is divergent. Uses of a uniform value can be divergent.
+ bool isDivergentUse(const Use &U) const;
+
+ /// Whether \p V is uniform/non-divergent.
bool isUniform(const Value &V) const { return !isDivergent(V); }
+ /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
+ /// divergent.
+ bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
+
/// Print all divergent values in the kernel.
void print(raw_ostream &OS, const Module *) const;
};
diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h
index d3fcfc2d41ab..5d1c5a05206a 100644
--- a/include/llvm/Analysis/GlobalsModRef.h
+++ b/include/llvm/Analysis/GlobalsModRef.h
@@ -34,7 +34,7 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
class FunctionInfo;
const DataLayout &DL;
- const TargetLibraryInfo &TLI;
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
/// The globals that do not have their addresses taken.
SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals;
@@ -72,14 +72,18 @@ class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
/// could perform to the memory utilization here if this becomes a problem.
std::list<DeletionCallbackHandle> Handles;
- explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI);
+ explicit GlobalsAAResult(
+ const DataLayout &DL,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI);
public:
GlobalsAAResult(GlobalsAAResult &&Arg);
~GlobalsAAResult();
- static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI,
- CallGraph &CG);
+ static GlobalsAAResult
+ analyzeModule(Module &M,
+ std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
+ CallGraph &CG);
//------------------------------------------------
// Implement the AliasAnalysis API
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 054ffca7215e..a5ffca13046b 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -31,6 +31,7 @@
#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
#define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
@@ -141,6 +142,13 @@ Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q);
+/// Given operands for the multiplication of a FMA, fold the result or return
+/// null. In contrast to SimplifyFMulInst, this function will not perform
+/// simplifications whose unrounded results differ when rounded to the argument
+/// type.
+Value *SimplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF,
+ const SimplifyQuery &Q);
+
/// Given operands for a Mul, fold the result or return null.
Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q);
@@ -234,21 +242,19 @@ Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
/// Given operand for a UnaryOperator, fold the result or return null.
Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
-/// Given operand for an FP UnaryOperator, fold the result or return null.
-/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
-Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
- const SimplifyQuery &Q);
+/// Given operand for a UnaryOperator, fold the result or return null.
+/// Try to use FastMathFlags when folding the result.
+Value *SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q);
/// Given operands for a BinaryOperator, fold the result or return null.
Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
-/// Given operands for an FP BinaryOperator, fold the result or return null.
-/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
-/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
-Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- FastMathFlags FMF, const SimplifyQuery &Q);
+/// Given operands for a BinaryOperator, fold the result or return null.
+/// Try to use FastMathFlags when folding the result.
+Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ FastMathFlags FMF, const SimplifyQuery &Q);
/// Given a callsite, fold the result or return null.
Value *SimplifyCall(CallBase *Call, const SimplifyQuery &Q);
@@ -263,12 +269,14 @@ Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
/// This first performs a normal RAUW of I with SimpleV. It then recursively
/// attempts to simplify those users updated by the operation. The 'I'
/// instruction must not be equal to the simplified value 'SimpleV'.
+/// If UnsimplifiedUsers is provided, instructions that could not be simplified
+/// are added to it.
///
/// The function returns true if any simplifications were performed.
-bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
- const TargetLibraryInfo *TLI = nullptr,
- const DominatorTree *DT = nullptr,
- AssumptionCache *AC = nullptr);
+bool replaceAndRecursivelySimplify(
+ Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr,
+ const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr,
+ SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr);
/// Recursively attempt to simplify an instruction.
///
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index 2d83929211e2..20a35bef189b 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -931,7 +931,8 @@ public:
/// This sets up the graph and computes all of the entry points of the graph.
/// No function definitions are scanned until their nodes in the graph are
/// requested during traversal.
- LazyCallGraph(Module &M, TargetLibraryInfo &TLI);
+ LazyCallGraph(Module &M,
+ function_ref<TargetLibraryInfo &(Function &)> GetTLI);
LazyCallGraph(LazyCallGraph &&G);
LazyCallGraph &operator=(LazyCallGraph &&RHS);
@@ -1267,7 +1268,12 @@ public:
/// This just builds the set of entry points to the call graph. The rest is
/// built lazily as it is walked.
LazyCallGraph run(Module &M, ModuleAnalysisManager &AM) {
- return LazyCallGraph(M, AM.getResult<TargetLibraryAnalysis>(M));
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+ return LazyCallGraph(M, GetTLI);
}
};
diff --git a/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
index 0a338b816640..e33b8f4129f3 100644
--- a/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ b/include/llvm/Analysis/LegacyDivergenceAnalysis.h
@@ -39,17 +39,18 @@ public:
void print(raw_ostream &OS, const Module *) const override;
// Returns true if V is divergent at its definition.
- //
- // Even if this function returns false, V may still be divergent when used
- // in a different basic block.
bool isDivergent(const Value *V) const;
+ // Returns true if U is divergent. Uses of a uniform value can be divergent.
+ bool isDivergentUse(const Use *U) const;
+
// Returns true if V is uniform/non-divergent.
- //
- // Even if this function returns true, V may still be divergent when used
- // in a different basic block.
bool isUniform(const Value *V) const { return !isDivergent(V); }
+ // Returns true if U is uniform/non-divergent. Uses of a uniform value can be
+ // divergent.
+ bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }
+
// Keep the analysis results uptodate by removing an erased value.
void removeValue(const Value *V) { DivergentValues.erase(V); }
@@ -62,6 +63,9 @@ private:
// Stores all divergent values.
DenseSet<const Value *> DivergentValues;
+
+ // Stores divergent uses of possibly uniform values.
+ DenseSet<const Use *> DivergentUses;
};
} // End llvm namespace
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index 5df6bb02308d..9604b2521e89 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -20,7 +20,9 @@
namespace llvm {
class DataLayout;
+class Loop;
class MDNode;
+class ScalarEvolution;
/// Return true if this is always a dereferenceable pointer. If the context
/// instruction is specified perform context-sensitive analysis and return true
@@ -35,7 +37,8 @@ bool isDereferenceablePointer(const Value *V, Type *Ty,
/// performs context-sensitive analysis and returns true if the pointer is
/// dereferenceable at the specified instruction.
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
- unsigned Align, const DataLayout &DL,
+ MaybeAlign Alignment,
+ const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -43,7 +46,7 @@ bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
/// greater or equal than requested. If the context instruction is specified
/// performs context-sensitive analysis and returns true if the pointer is
/// dereferenceable at the specified instruction.
-bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+bool isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
const APInt &Size, const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -56,11 +59,22 @@ bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
/// If it is not obviously safe to load from the specified pointer, we do a
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+bool isSafeToLoadUnconditionally(Value *V, MaybeAlign Alignment, APInt &Size,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DominatorTree *DT = nullptr);
+/// Return true if we can prove that the given load (which is assumed to be
+/// within the specified loop) would access only dereferenceable memory, and
+/// be properly aligned on every iteration of the specified loop regardless of
+/// its placement within the loop. (i.e. does not require predication beyond
+/// that required by the the header itself and could be hoisted into the header
+/// if desired.) This is more powerful than the variants above when the
+/// address loaded from is analyzeable by SCEV.
+bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
+ ScalarEvolution &SE,
+ DominatorTree &DT);
+
/// Return true if we know that executing a load from this value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
@@ -69,7 +83,7 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
/// If it is not obviously safe to load from the specified pointer, we do a
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
+bool isSafeToLoadUnconditionally(Value *V, Type *Ty, MaybeAlign Alignment,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DominatorTree *DT = nullptr);
diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h
index 368a810cfa67..a2e65a7310af 100644
--- a/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/include/llvm/Analysis/LoopAnalysisManager.h
@@ -86,8 +86,9 @@ typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function>
template <> class LoopAnalysisManagerFunctionProxy::Result {
public:
explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI)
- : InnerAM(&InnerAM), LI(&LI) {}
- Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI) {
+ : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {}
+ Result(Result &&Arg)
+ : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) {
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// analysis state.
@@ -96,6 +97,7 @@ public:
Result &operator=(Result &&RHS) {
InnerAM = RHS.InnerAM;
LI = RHS.LI;
+ MSSAUsed = RHS.MSSAUsed;
// We have to null out the analysis manager in the moved-from state
// because we are taking ownership of the responsibilty to clear the
// analysis state.
@@ -112,6 +114,9 @@ public:
InnerAM->clear();
}
+ /// Mark MemorySSA as used so we can invalidate self if MSSA is invalidated.
+ void markMSSAUsed() { MSSAUsed = true; }
+
/// Accessor for the analysis manager.
LoopAnalysisManager &getManager() { return *InnerAM; }
@@ -130,6 +135,7 @@ public:
private:
LoopAnalysisManager *InnerAM;
LoopInfo *LI;
+ bool MSSAUsed;
};
/// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy
diff --git a/include/llvm/Analysis/LoopCacheAnalysis.h b/include/llvm/Analysis/LoopCacheAnalysis.h
new file mode 100644
index 000000000000..ffec78b6db2c
--- /dev/null
+++ b/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -0,0 +1,281 @@
+//===- llvm/Analysis/LoopCacheAnalysis.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines the interface for the loop cache analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
+#define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class LPMUpdater;
+using CacheCostTy = int64_t;
+using LoopVectorTy = SmallVector<Loop *, 8>;
+
+/// Represents a memory reference as a base pointer and a set of indexing
+/// operations. For example given the array reference A[i][2j+1][3k+2] in a
+/// 3-dim loop nest:
+/// for(i=0;i<n;++i)
+/// for(j=0;j<m;++j)
+/// for(k=0;k<o;++k)
+/// ... A[i][2j+1][3k+2] ...
+/// We expect:
+/// BasePointer -> A
+/// Subscripts -> [{0,+,1}<%for.i>][{1,+,2}<%for.j>][{2,+,3}<%for.k>]
+/// Sizes -> [m][o][4]
+class IndexedReference {
+ friend raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
+
+public:
+ /// Construct an indexed reference given a \p StoreOrLoadInst instruction.
+ IndexedReference(Instruction &StoreOrLoadInst, const LoopInfo &LI,
+ ScalarEvolution &SE);
+
+ bool isValid() const { return IsValid; }
+ const SCEV *getBasePointer() const { return BasePointer; }
+ size_t getNumSubscripts() const { return Subscripts.size(); }
+ const SCEV *getSubscript(unsigned SubNum) const {
+ assert(SubNum < getNumSubscripts() && "Invalid subscript number");
+ return Subscripts[SubNum];
+ }
+ const SCEV *getFirstSubscript() const {
+ assert(!Subscripts.empty() && "Expecting non-empty container");
+ return Subscripts.front();
+ }
+ const SCEV *getLastSubscript() const {
+ assert(!Subscripts.empty() && "Expecting non-empty container");
+ return Subscripts.back();
+ }
+
+ /// Return true/false if the current object and the indexed reference \p Other
+ /// are/aren't in the same cache line of size \p CLS. Two references are in
+ /// the same chace line iff the distance between them in the innermost
+ /// dimension is less than the cache line size. Return None if unsure.
+ Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS,
+ AliasAnalysis &AA) const;
+
+ /// Return true if the current object and the indexed reference \p Other
+ /// have distance smaller than \p MaxDistance in the dimension associated with
+ /// the given loop \p L. Return false if the distance is not smaller than \p
+ /// MaxDistance and None if unsure.
+ Optional<bool> hasTemporalReuse(const IndexedReference &Other,
+ unsigned MaxDistance, const Loop &L,
+ DependenceInfo &DI, AliasAnalysis &AA) const;
+
+ /// Compute the cost of the reference w.r.t. the given loop \p L when it is
+ /// considered in the innermost position in the loop nest.
+ /// The cost is defined as:
+ /// - equal to one if the reference is loop invariant, or
+ /// - equal to '(TripCount * stride) / cache_line_size' if:
+ /// + the reference stride is less than the cache line size, and
+ /// + the coefficient of this loop's index variable used in all other
+ /// subscripts is zero
+ /// - or otherwise equal to 'TripCount'.
+ CacheCostTy computeRefCost(const Loop &L, unsigned CLS) const;
+
+private:
+ /// Attempt to delinearize the indexed reference.
+ bool delinearize(const LoopInfo &LI);
+
+ /// Return true if the index reference is invariant with respect to loop \p L.
+ bool isLoopInvariant(const Loop &L) const;
+
+ /// Return true if the indexed reference is 'consecutive' in loop \p L.
+ /// An indexed reference is 'consecutive' if the only coefficient that uses
+ /// the loop induction variable is the rightmost one, and the access stride is
+ /// smaller than the cache line size \p CLS.
+ bool isConsecutive(const Loop &L, unsigned CLS) const;
+
+ /// Return the coefficient used in the rightmost dimension.
+ const SCEV *getLastCoefficient() const;
+
+ /// Return true if the coefficient corresponding to induction variable of
+ /// loop \p L in the given \p Subscript is zero or is loop invariant in \p L.
+ bool isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
+ const Loop &L) const;
+
+ /// Verify that the given \p Subscript is 'well formed' (must be a simple add
+ /// recurrence).
+ bool isSimpleAddRecurrence(const SCEV &Subscript, const Loop &L) const;
+
+ /// Return true if the given reference \p Other is definetely aliased with
+ /// the indexed reference represented by this class.
+ bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const;
+
+private:
+ /// True if the reference can be delinearized, false otherwise.
+ bool IsValid = false;
+
+ /// Represent the memory reference instruction.
+ Instruction &StoreOrLoadInst;
+
+ /// The base pointer of the memory reference.
+ const SCEV *BasePointer = nullptr;
+
+ /// The subscript (indexes) of the memory reference.
+ SmallVector<const SCEV *, 3> Subscripts;
+
+ /// The dimensions of the memory reference.
+ SmallVector<const SCEV *, 3> Sizes;
+
+ ScalarEvolution &SE;
+};
+
+/// A reference group represents a set of memory references that exhibit
+/// temporal or spacial reuse. Two references belong to the same
+/// reference group with respect to a inner loop L iff:
+/// 1. they have a loop independent dependency, or
+/// 2. they have a loop carried dependence with a small dependence distance
+/// (e.g. less than 2) carried by the inner loop, or
+/// 3. they refer to the same array, and the subscript in their innermost
+/// dimension is less than or equal to 'd' (where 'd' is less than the cache
+/// line size)
+///
+/// Intuitively a reference group represents memory references that access
+/// the same cache line. Conditions 1,2 above account for temporal reuse, while
+/// contition 3 accounts for spacial reuse.
+using ReferenceGroupTy = SmallVector<std::unique_ptr<IndexedReference>, 8>;
+using ReferenceGroupsTy = SmallVector<ReferenceGroupTy, 8>;
+
+/// \c CacheCost represents the estimated cost of a inner loop as the number of
+/// cache lines used by the memory references it contains.
+/// The 'cache cost' of a loop 'L' in a loop nest 'LN' is computed as the sum of
+/// the cache costs of all of its reference groups when the loop is considered
+/// to be in the innermost position in the nest.
+/// A reference group represents memory references that fall into the same cache
+/// line. Each reference group is analysed with respect to the innermost loop in
+/// a loop nest. The cost of a reference is defined as follow:
+/// - one if it is loop invariant w.r.t the innermost loop,
+/// - equal to the loop trip count divided by the cache line times the
+/// reference stride if the reference stride is less than the cache line
+/// size (CLS), and the coefficient of this loop's index variable used in all
+/// other subscripts is zero (e.g. RefCost = TripCount/(CLS/RefStride))
+/// - equal to the innermost loop trip count if the reference stride is greater
+/// or equal to the cache line size CLS.
+class CacheCost {
+ friend raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
+ using LoopTripCountTy = std::pair<const Loop *, unsigned>;
+ using LoopCacheCostTy = std::pair<const Loop *, CacheCostTy>;
+
+public:
+ static CacheCostTy constexpr InvalidCost = -1;
+
+ /// Construct a CacheCost object for the loop nest described by \p Loops.
+ /// The optional parameter \p TRT can be used to specify the max. distance
+ /// between array elements accessed in a loop so that the elements are
+ /// classified to have temporal reuse.
+ CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE,
+ TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI,
+ Optional<unsigned> TRT = None);
+
+ /// Create a CacheCost for the loop nest rooted by \p Root.
+ /// The optional parameter \p TRT can be used to specify the max. distance
+ /// between array elements accessed in a loop so that the elements are
+ /// classified to have temporal reuse.
+ static std::unique_ptr<CacheCost>
+ getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR, DependenceInfo &DI,
+ Optional<unsigned> TRT = None);
+
+ /// Return the estimated cost of loop \p L if the given loop is part of the
+ /// loop nest associated with this object. Return -1 otherwise.
+ CacheCostTy getLoopCost(const Loop &L) const {
+ auto IT = std::find_if(
+ LoopCosts.begin(), LoopCosts.end(),
+ [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; });
+ return (IT != LoopCosts.end()) ? (*IT).second : -1;
+ }
+
+ /// Return the estimated ordered loop costs.
+ const ArrayRef<LoopCacheCostTy> getLoopCosts() const { return LoopCosts; }
+
+private:
+ /// Calculate the cache footprint of each loop in the nest (when it is
+ /// considered to be in the innermost position).
+ void calculateCacheFootprint();
+
+ /// Partition store/load instructions in the loop nest into reference groups.
+ /// Two or more memory accesses belong in the same reference group if they
+ /// share the same cache line.
+ bool populateReferenceGroups(ReferenceGroupsTy &RefGroups) const;
+
+ /// Calculate the cost of the given loop \p L assuming it is the innermost
+ /// loop in nest.
+ CacheCostTy computeLoopCacheCost(const Loop &L,
+ const ReferenceGroupsTy &RefGroups) const;
+
+ /// Compute the cost of a representative reference in reference group \p RG
+ /// when the given loop \p L is considered as the innermost loop in the nest.
+ /// The computed cost is an estimate for the number of cache lines used by the
+ /// reference group. The representative reference cost is defined as:
+ /// - equal to one if the reference is loop invariant, or
+ /// - equal to '(TripCount * stride) / cache_line_size' if (a) loop \p L's
+ /// induction variable is used only in the reference subscript associated
+ /// with loop \p L, and (b) the reference stride is less than the cache
+ /// line size, or
+ /// - TripCount otherwise
+ CacheCostTy computeRefGroupCacheCost(const ReferenceGroupTy &RG,
+ const Loop &L) const;
+
+ /// Sort the LoopCosts vector by decreasing cache cost.
+ void sortLoopCosts() {
+ sort(LoopCosts, [](const LoopCacheCostTy &A, const LoopCacheCostTy &B) {
+ return A.second > B.second;
+ });
+ }
+
+private:
+ /// Loops in the loop nest associated with this object.
+ LoopVectorTy Loops;
+
+ /// Trip counts for the loops in the loop nest associated with this object.
+ SmallVector<LoopTripCountTy, 3> TripCounts;
+
+ /// Cache costs for the loops in the loop nest associated with this object.
+ SmallVector<LoopCacheCostTy, 3> LoopCosts;
+
+ /// The max. distance between array elements accessed in a loop so that the
+ /// elements are classified to have temporal reuse.
+ Optional<unsigned> TRT;
+
+ const LoopInfo &LI;
+ ScalarEvolution &SE;
+ TargetTransformInfo &TTI;
+ AliasAnalysis &AA;
+ DependenceInfo &DI;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const IndexedReference &R);
+raw_ostream &operator<<(raw_ostream &OS, const CacheCost &CC);
+
+/// Printer pass for the \c CacheCost results.
+class LoopCachePrinterPass : public PassInfoMixin<LoopCachePrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit LoopCachePrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_LOOPCACHEANALYSIS_H
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 584eb3a8c854..abf3863b0601 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -30,6 +30,9 @@
// instance. In particular, a Loop might be inside such a non-loop SCC, or a
// non-loop SCC might contain a sub-SCC which is a Loop.
//
+// For an overview of terminology used in this API (and thus all of our loop
+// analyses or transforms), see docs/LoopTerminology.rst.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_LOOPINFO_H
@@ -570,9 +573,9 @@ public:
bool getIncomingAndBackEdge(BasicBlock *&Incoming,
BasicBlock *&Backedge) const;
- /// Below are some utilities to get loop bounds and induction variable, and
- /// check if a given phinode is an auxiliary induction variable, as well as
- /// checking if the loop is canonical.
+ /// Below are some utilities to get the loop guard, loop bounds and induction
+ /// variable, and to check if a given phinode is an auxiliary induction
+ /// variable, if the loop is guarded, and if the loop is canonical.
///
/// Here is an example:
/// \code
@@ -604,6 +607,9 @@ public:
///
/// - getInductionVariable --> i_1
/// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+ /// - getLoopGuardBranch()
+ /// --> `if (guardcmp) goto preheader; else goto afterloop`
+ /// - isGuarded() --> true
/// - isCanonical --> false
struct LoopBounds {
/// Return the LoopBounds object if
@@ -725,6 +731,31 @@ public:
bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
ScalarEvolution &SE) const;
+ /// Return the loop guard branch, if it exists.
+ ///
+ /// This currently only works on simplified loop, as it requires a preheader
+ /// and a latch to identify the guard. It will work on loops of the form:
+ /// \code
+ /// GuardBB:
+ /// br cond1, Preheader, ExitSucc <== GuardBranch
+ /// Preheader:
+ /// br Header
+ /// Header:
+ /// ...
+ /// br Latch
+ /// Latch:
+ /// br cond2, Header, ExitBlock
+ /// ExitBlock:
+ /// br ExitSucc
+ /// ExitSucc:
+ /// \endcode
+ BranchInst *getLoopGuardBranch() const;
+
+ /// Return true iff the loop is
+ /// - in simplify rotated form, and
+ /// - guarded by a loop guard branch.
+ bool isGuarded() const { return (getLoopGuardBranch() != nullptr); }
+
/// Return true if the loop induction variable starts at zero and increments
/// by one each time through the loop.
bool isCanonical(ScalarEvolution &SE) const;
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 4c33dac9e21e..8b11e848a195 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -85,9 +85,9 @@ template <class BlockT, class LoopT>
bool LoopBase<BlockT, LoopT>::hasDedicatedExits() const {
// Each predecessor of each exit block of a normal loop is contained
// within the loop.
- SmallVector<BlockT *, 4> ExitBlocks;
- getExitBlocks(ExitBlocks);
- for (BlockT *EB : ExitBlocks)
+ SmallVector<BlockT *, 4> UniqueExitBlocks;
+ getUniqueExitBlocks(UniqueExitBlocks);
+ for (BlockT *EB : UniqueExitBlocks)
for (BlockT *Predecessor : children<Inverse<BlockT *>>(EB))
if (!contains(Predecessor))
return false;
@@ -200,8 +200,6 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const {
}
}
- // Make sure there is only one exit out of the preheader.
- assert(Out && "Header of loop has no predecessors from outside loop?");
return Out;
}
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 49f9e58ffad7..a89d76b9e5bd 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -58,6 +58,9 @@ class Value;
/// like).
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
+bool isAllocationFn(const Value *V,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast = false);
/// Tests if a value is a call or invoke to a function that returns a
/// NoAlias pointer (including malloc/calloc/realloc/strdup-like functions).
@@ -68,6 +71,9 @@ bool isNoAliasFn(const Value *V, const TargetLibraryInfo *TLI,
/// allocates uninitialized memory (such as malloc).
bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
+bool isMallocLikeFn(const Value *V,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
+ bool LookThroughBitCast = false);
/// Tests if a value is a call or invoke to a library function that
/// allocates zero-filled memory (such as calloc).
@@ -93,6 +99,16 @@ bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
/// reallocates memory (e.g., realloc).
bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory and throws if an allocation failed (e.g., new).
+bool isOpNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false);
+
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory (strdup, strndup).
+bool isStrdupLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false);
+
//===----------------------------------------------------------------------===//
// malloc Call Utility Functions.
//
@@ -100,9 +116,13 @@ bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
/// extractMallocCall - Returns the corresponding CallInst if the instruction
/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we
/// ignore InvokeInst here.
-const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI);
-inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) {
- return const_cast<CallInst*>(extractMallocCall((const Value*)I, TLI));
+const CallInst *
+extractMallocCall(const Value *I,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
+inline CallInst *
+extractMallocCall(Value *I,
+ function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
+ return const_cast<CallInst *>(extractMallocCall((const Value *)I, GetTLI));
}
/// getMallocType - Returns the PointerType resulting from the malloc call.
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index e2669c2fa601..e89e5690fad0 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -362,11 +362,14 @@ private:
PhiValues &PV;
PredIteratorCache PredCache;
+ unsigned DefaultBlockScanLimit;
+
public:
MemoryDependenceResults(AliasAnalysis &AA, AssumptionCache &AC,
- const TargetLibraryInfo &TLI,
- DominatorTree &DT, PhiValues &PV)
- : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV) {}
+ const TargetLibraryInfo &TLI, DominatorTree &DT,
+ PhiValues &PV, unsigned DefaultBlockScanLimit)
+ : AA(AA), AC(AC), TLI(TLI), DT(DT), PV(PV),
+ DefaultBlockScanLimit(DefaultBlockScanLimit) {}
/// Handle invalidation in the new PM.
bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -511,9 +514,14 @@ class MemoryDependenceAnalysis
static AnalysisKey Key;
+ unsigned DefaultBlockScanLimit;
+
public:
using Result = MemoryDependenceResults;
+ MemoryDependenceAnalysis();
+ MemoryDependenceAnalysis(unsigned DefaultBlockScanLimit) : DefaultBlockScanLimit(DefaultBlockScanLimit) { }
+
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h
index b7730be75354..e89bf26a7234 100644
--- a/include/llvm/Analysis/MemorySSA.h
+++ b/include/llvm/Analysis/MemorySSA.h
@@ -793,6 +793,7 @@ protected:
friend class MemorySSAPrinterLegacyPass;
friend class MemorySSAUpdater;
+ void verifyPrevDefInPhis(Function &F) const;
void verifyDefUses(Function &F) const;
void verifyDomination(Function &F) const;
void verifyOrdering(Function &F) const;
@@ -830,7 +831,8 @@ protected:
void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
AccessList::iterator);
MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
- const MemoryUseOrDef *Template = nullptr);
+ const MemoryUseOrDef *Template = nullptr,
+ bool CreationMustSucceed = true);
private:
template <class AliasAnalysisType> class ClobberWalkerBase;
diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h
index d4d8040c1ff6..1d34663721e3 100644
--- a/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/include/llvm/Analysis/MemorySSAUpdater.h
@@ -99,7 +99,7 @@ public:
/// load a
/// Where a mayalias b, *does* require RenameUses be set to true.
void insertDef(MemoryDef *Def, bool RenameUses = false);
- void insertUse(MemoryUse *Use);
+ void insertUse(MemoryUse *Use, bool RenameUses = false);
/// Update the MemoryPhi in `To` following an edge deletion between `From` and
/// `To`. If `To` becomes unreachable, a call to removeBlocks should be made.
void removeEdge(BasicBlock *From, BasicBlock *To);
@@ -275,6 +275,7 @@ private:
getPreviousDefRecursive(BasicBlock *,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
MemoryAccess *recursePhi(MemoryAccess *Phi);
+ MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi);
template <class RangeType>
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs);
diff --git a/include/llvm/Analysis/MustExecute.h b/include/llvm/Analysis/MustExecute.h
index 3ef539c89d97..87cf9f85c7f1 100644
--- a/include/llvm/Analysis/MustExecute.h
+++ b/include/llvm/Analysis/MustExecute.h
@@ -7,10 +7,17 @@
//===----------------------------------------------------------------------===//
/// \file
/// Contains a collection of routines for determining if a given instruction is
-/// guaranteed to execute if a given point in control flow is reached. The most
+/// guaranteed to execute if a given point in control flow is reached. The most
/// common example is an instruction within a loop being provably executed if we
/// branch to the header of it's containing loop.
///
+/// There are two interfaces available to determine if an instruction is
+/// executed once a given point in the control flow is reached:
+/// 1) A loop-centric one derived from LoopSafetyInfo.
+/// 2) A "must be executed context"-based one implemented in the
+/// MustBeExecutedContextExplorer.
+/// Please refer to the class comments for more information.
+///
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
@@ -164,6 +171,280 @@ public:
virtual ~ICFLoopSafetyInfo() {};
};
-}
+struct MustBeExecutedContextExplorer;
+
+/// Must be executed iterators visit stretches of instructions that are
+/// guaranteed to be executed together, potentially with other instruction
+/// executed in-between.
+///
+/// Given the following code, and assuming all statements are single
+/// instructions which transfer execution to the successor (see
+/// isGuaranteedToTransferExecutionToSuccessor), there are two possible
+/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
+/// and E. If we start at C or D, we will visit all instructions A-E.
+///
+/// \code
+/// A;
+/// B;
+/// if (...) {
+/// C;
+/// D;
+/// }
+/// E;
+/// \endcode
+///
+///
+/// Below is the example extneded with instructions F and G. Now we assume F
+/// might not transfer execution to it's successor G. As a result we get the
+/// following visit sets:
+///
+/// Start Instruction | Visit Set
+/// A | A, B, E, F
+/// B | A, B, E, F
+/// C | A, B, C, D, E, F
+/// D | A, B, C, D, E, F
+/// E | A, B, E, F
+/// F | A, B, E, F
+/// G | A, B, E, F, G
+///
+///
+/// \code
+/// A;
+/// B;
+/// if (...) {
+/// C;
+/// D;
+/// }
+/// E;
+/// F; // Might not transfer execution to its successor G.
+/// G;
+/// \endcode
+///
+///
+/// A more complex example involving conditionals, loops, break, and continue
+/// is shown below. We again assume all instructions will transmit control to
+/// the successor and we assume we can prove the inner loop to be finite. We
+/// omit non-trivial branch conditions as the exploration is oblivious to them.
+/// Constant branches are assumed to be unconditional in the CFG. The resulting
+/// visist sets are shown in the table below.
+///
+/// \code
+/// A;
+/// while (true) {
+/// B;
+/// if (...)
+/// C;
+/// if (...)
+/// continue;
+/// D;
+/// if (...)
+/// break;
+/// do {
+/// if (...)
+/// continue;
+/// E;
+/// } while (...);
+/// F;
+/// }
+/// G;
+/// \endcode
+///
+/// Start Instruction | Visit Set
+/// A | A, B
+/// B | A, B
+/// C | A, B, C
+/// D | A, B, D
+/// E | A, B, D, E, F
+/// F | A, B, D, F
+/// G | A, B, D, G
+///
+///
+/// Note that the examples show optimal visist sets but not necessarily the ones
+/// derived by the explorer depending on the available CFG analyses (see
+/// MustBeExecutedContextExplorer). Also note that we, depending on the options,
+/// the visit set can contain instructions from other functions.
+struct MustBeExecutedIterator {
+ /// Type declarations that make his class an input iterator.
+ ///{
+ typedef const Instruction *value_type;
+ typedef std::ptrdiff_t difference_type;
+ typedef const Instruction **pointer;
+ typedef const Instruction *&reference;
+ typedef std::input_iterator_tag iterator_category;
+ ///}
+
+ using ExplorerTy = MustBeExecutedContextExplorer;
+
+ MustBeExecutedIterator(const MustBeExecutedIterator &Other)
+ : Visited(Other.Visited), Explorer(Other.Explorer),
+ CurInst(Other.CurInst) {}
+
+ MustBeExecutedIterator(MustBeExecutedIterator &&Other)
+ : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
+ CurInst(Other.CurInst) {}
+
+ MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
+ if (this != &Other) {
+ std::swap(Visited, Other.Visited);
+ std::swap(CurInst, Other.CurInst);
+ }
+ return *this;
+ }
+
+ ~MustBeExecutedIterator() {}
+
+ /// Pre- and post-increment operators.
+ ///{
+ MustBeExecutedIterator &operator++() {
+ CurInst = advance();
+ return *this;
+ }
+
+ MustBeExecutedIterator operator++(int) {
+ MustBeExecutedIterator tmp(*this);
+ operator++();
+ return tmp;
+ }
+ ///}
+
+ /// Equality and inequality operators. Note that we ignore the history here.
+ ///{
+ bool operator==(const MustBeExecutedIterator &Other) const {
+ return CurInst == Other.CurInst;
+ }
+
+ bool operator!=(const MustBeExecutedIterator &Other) const {
+ return !(*this == Other);
+ }
+ ///}
+
+ /// Return the underlying instruction.
+ const Instruction *&operator*() { return CurInst; }
+ const Instruction *getCurrentInst() const { return CurInst; }
+
+ /// Return true if \p I was encountered by this iterator already.
+ bool count(const Instruction *I) const { return Visited.count(I); }
+
+private:
+ using VisitedSetTy = DenseSet<const Instruction *>;
+
+ /// Private constructors.
+ MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
+
+ /// Reset the iterator to its initial state pointing at \p I.
+ void reset(const Instruction *I);
+
+ /// Try to advance one of the underlying positions (Head or Tail).
+ ///
+ /// \return The next instruction in the must be executed context, or nullptr
+ /// if none was found.
+ const Instruction *advance();
+
+ /// A set to track the visited instructions in order to deal with endless
+ /// loops and recursion.
+ VisitedSetTy Visited;
+
+ /// A reference to the explorer that created this iterator.
+ ExplorerTy &Explorer;
+
+ /// The instruction we are currently exposing to the user. There is always an
+ /// instruction that we know is executed with the given program point,
+ /// initially the program point itself.
+ const Instruction *CurInst;
+
+ friend struct MustBeExecutedContextExplorer;
+};
+
+/// A "must be executed context" for a given program point PP is the set of
+/// instructions, potentially before and after PP, that are executed always when
+/// PP is reached. The MustBeExecutedContextExplorer an interface to explore
+/// "must be executed contexts" in a module through the use of
+/// MustBeExecutedIterator.
+///
+/// The explorer exposes "must be executed iterators" that traverse the must be
+/// executed context. There is little information sharing between iterators as
+/// the expected use case involves few iterators for "far apart" instructions.
+/// If that changes, we should consider caching more intermediate results.
+struct MustBeExecutedContextExplorer {
+
+ /// In the description of the parameters we use PP to denote a program point
+ /// for which the must be executed context is explored, or put differently,
+ /// for which the MustBeExecutedIterator is created.
+ ///
+ /// \param ExploreInterBlock Flag to indicate if instructions in blocks
+ /// other than the parent of PP should be
+ /// explored.
+ MustBeExecutedContextExplorer(bool ExploreInterBlock)
+ : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {}
+
+ /// Clean up the dynamically allocated iterators.
+ ~MustBeExecutedContextExplorer() {
+ DeleteContainerSeconds(InstructionIteratorMap);
+ }
+
+ /// Iterator-based interface. \see MustBeExecutedIterator.
+ ///{
+ using iterator = MustBeExecutedIterator;
+ using const_iterator = const MustBeExecutedIterator;
+
+ /// Return an iterator to explore the context around \p PP.
+ iterator &begin(const Instruction *PP) {
+ auto *&It = InstructionIteratorMap[PP];
+ if (!It)
+ It = new iterator(*this, PP);
+ return *It;
+ }
+
+ /// Return an iterator to explore the cached context around \p PP.
+ const_iterator &begin(const Instruction *PP) const {
+ return *InstructionIteratorMap.lookup(PP);
+ }
+
+ /// Return an universal end iterator.
+ ///{
+ iterator &end() { return EndIterator; }
+ iterator &end(const Instruction *) { return EndIterator; }
+
+ const_iterator &end() const { return EndIterator; }
+ const_iterator &end(const Instruction *) const { return EndIterator; }
+ ///}
+
+ /// Return an iterator range to explore the context around \p PP.
+ llvm::iterator_range<iterator> range(const Instruction *PP) {
+ return llvm::make_range(begin(PP), end(PP));
+ }
+
+ /// Return an iterator range to explore the cached context around \p PP.
+ llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
+ return llvm::make_range(begin(PP), end(PP));
+ }
+ ///}
+
+ /// Return the next instruction that is guaranteed to be executed after \p PP.
+ ///
+ /// \param It The iterator that is used to traverse the must be
+ /// executed context.
+ /// \param PP The program point for which the next instruction
+ /// that is guaranteed to execute is determined.
+ const Instruction *
+ getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
+ const Instruction *PP);
+
+ /// Parameter that limit the performed exploration. See the constructor for
+ /// their meaning.
+ ///{
+ const bool ExploreInterBlock;
+ ///}
+
+private:
+ /// Map from instructions to associated must be executed iterators.
+ DenseMap<const Instruction *, MustBeExecutedIterator *>
+ InstructionIteratorMap;
+
+ /// A unique end iterator.
+ MustBeExecutedIterator EndIterator;
+};
+
+} // namespace llvm
#endif
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index d9c97dff8c6e..8562519fa7b1 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -103,6 +103,13 @@ namespace llvm {
//
FunctionPass *createMustExecutePrinter();
+ //===--------------------------------------------------------------------===//
+ //
+ // createMustBeExecutedContextPrinter - This pass prints information about which
+ // instructions are guaranteed to execute together (run with -analyze).
+ //
+ ModulePass *createMustBeExecutedContextPrinter();
+
}
#endif
diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h
index f309d344b8d1..6693e40ccf22 100644
--- a/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -52,6 +52,15 @@ private:
// because the number of profile counts required to reach the hot
// percentile is above a huge threshold.
Optional<bool> HasHugeWorkingSetSize;
+ // True if the working set size of the code is considered large,
+ // because the number of profile counts required to reach the hot
+ // percentile is above a large threshold.
+ Optional<bool> HasLargeWorkingSetSize;
+ // Compute the threshold for a given cutoff.
+ Optional<uint64_t> computeThreshold(int PercentileCutoff);
+ // The map that caches the threshold values. The keys are the percentile
+ // cutoff values and the values are the corresponding threshold values.
+ DenseMap<int, uint64_t> ThresholdCache;
public:
ProfileSummaryInfo(Module &M) : M(M) {}
@@ -96,6 +105,8 @@ public:
bool AllowSynthetic = false);
/// Returns true if the working set size of the code is considered huge.
bool hasHugeWorkingSetSize();
+ /// Returns true if the working set size of the code is considered large.
+ bool hasLargeWorkingSetSize();
/// Returns true if \p F has hot function entry.
bool isFunctionEntryHot(const Function *F);
/// Returns true if \p F contains hot code.
@@ -104,14 +115,26 @@ public:
bool isFunctionEntryCold(const Function *F);
/// Returns true if \p F contains only cold code.
bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
+ /// Returns true if \p F contains hot code with regard to a given hot
+ /// percentile cutoff value.
+ bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff,
+ const Function *F,
+ BlockFrequencyInfo &BFI);
/// Returns true if count \p C is considered hot.
bool isHotCount(uint64_t C);
/// Returns true if count \p C is considered cold.
bool isColdCount(uint64_t C);
+ /// Returns true if count \p C is considered hot with regard to a given
+ /// hot percentile cutoff value.
+ bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C);
/// Returns true if BasicBlock \p BB is considered hot.
bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
/// Returns true if BasicBlock \p BB is considered cold.
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI);
+ /// Returns true if BasicBlock \p BB is considered hot with regard to a given
+ /// hot percentile cutoff value.
+ bool isHotBlockNthPercentile(int PercentileCutoff,
+ const BasicBlock *BB, BlockFrequencyInfo *BFI);
/// Returns true if CallSite \p CS is considered hot.
bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI);
/// Returns true if Callsite \p CS is considered cold.
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index c59c09dd2095..6b5936680c37 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -365,7 +365,7 @@ typename Tr::RegionNodeT *RegionBase<Tr>::getBBNode(BlockT *BB) const {
auto Deconst = const_cast<RegionBase<Tr> *>(this);
typename BBNodeMapT::value_type V = {
BB,
- llvm::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
+ std::make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
at = BBNodeMap.insert(std::move(V)).first;
}
return at->second.get();
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 0bd98ef37e7a..9c55f7a5090f 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -468,6 +468,8 @@ template <> struct DenseMapInfo<ExitLimitQuery> {
/// can't do much with the SCEV objects directly, they must ask this class
/// for services.
class ScalarEvolution {
+ friend class ScalarEvolutionsTest;
+
public:
/// An enum describing the relationship between a SCEV and a loop.
enum LoopDisposition {
@@ -777,10 +779,10 @@ public:
/// to (i.e. a "conservative over-approximation") of the value returend by
/// getBackedgeTakenCount. If such a value cannot be computed, it returns the
/// SCEVCouldNotCompute object.
- const SCEV *getMaxBackedgeTakenCount(const Loop *L);
+ const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L);
/// Return true if the backedge taken count is either the value returned by
- /// getMaxBackedgeTakenCount or zero.
+ /// getConstantMaxBackedgeTakenCount or zero.
bool isBackedgeTakenCountMaxOrZero(const Loop *L);
/// Return true if the specified loop has an analyzable loop-invariant
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index a519f93216b3..b4d727449fbe 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -77,9 +77,13 @@ namespace llvm {
/// Phis that complete an IV chain. Reuse
DenseSet<AssertingVH<PHINode>> ChainedPhis;
- /// When true, expressions are expanded in "canonical" form. In particular,
- /// addrecs are expanded as arithmetic based on a canonical induction
- /// variable. When false, expression are expanded in a more literal form.
+ /// When true, SCEVExpander tries to expand expressions in "canonical" form.
+ /// When false, expressions are expanded in a more literal form.
+ ///
+ /// In "canonical" form addrecs are expanded as arithmetic based on a
+ /// canonical induction variable. Note that CanonicalMode doesn't guarantee
+ /// that all expressions are expanded in "canonical" form. For some
+ /// expressions literal mode can be preferred.
bool CanonicalMode;
/// When invoked from LSR, the expander is in "strength reduction" mode. The
@@ -275,8 +279,16 @@ namespace llvm {
/// Clear the current insertion point. This is useful if the instruction
/// that had been serving as the insertion point may have been deleted.
- void clearInsertPoint() {
- Builder.ClearInsertionPoint();
+ void clearInsertPoint() { Builder.ClearInsertionPoint(); }
+
+ /// Set location information used by debugging information.
+ void SetCurrentDebugLocation(DebugLoc L) {
+ Builder.SetCurrentDebugLocation(std::move(L));
+ }
+
+ /// Get location information used by debugging information.
+ const DebugLoc &getCurrentDebugLocation() const {
+ return Builder.getCurrentDebugLocation();
}
/// Return true if the specified instruction was inserted by the code
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index 4b5200f5a838..d4b223863c54 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -30,11 +30,12 @@ struct VecDesc {
unsigned VectorizationFactor;
};
- enum LibFunc {
+ enum LibFunc : unsigned {
#define TLI_DEFINE_ENUM
#include "llvm/Analysis/TargetLibraryInfo.def"
- NumLibFuncs
+ NumLibFuncs,
+ NotLibFunc
};
/// Implementation of the target library information.
@@ -48,7 +49,7 @@ class TargetLibraryInfoImpl {
unsigned char AvailableArray[(NumLibFuncs+3)/4];
llvm::DenseMap<unsigned, std::string> CustomNames;
- static StringRef const StandardNames[NumLibFuncs];
+ static StringLiteral const StandardNames[NumLibFuncs];
bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
enum AvailabilityState {
@@ -359,7 +360,6 @@ public:
TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl)
: PresetInfoImpl(std::move(PresetInfoImpl)) {}
- TargetLibraryInfo run(Module &M, ModuleAnalysisManager &);
TargetLibraryInfo run(Function &F, FunctionAnalysisManager &);
private:
@@ -385,8 +385,13 @@ public:
explicit TargetLibraryInfoWrapperPass(const Triple &T);
explicit TargetLibraryInfoWrapperPass(const TargetLibraryInfoImpl &TLI);
- TargetLibraryInfo &getTLI() { return TLI; }
- const TargetLibraryInfo &getTLI() const { return TLI; }
+ TargetLibraryInfo &getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) {
+ return TLI;
+ }
+ const TargetLibraryInfo &
+ getTLI(const Function &F LLVM_ATTRIBUTE_UNUSED) const {
+ return TLI;
+ }
};
} // end namespace llvm
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 7574b811bc1c..d6fa88411654 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -368,6 +368,20 @@ public:
/// optimize away.
unsigned getFlatAddressSpace() const;
+ /// Return any intrinsic address operand indexes which may be rewritten if
+ /// they use a flat address space pointer.
+ ///
+ /// \returns true if the intrinsic was handled.
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const;
+
+ /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
+ /// NewV, which has a different address space. This should happen for every
+ /// operand index that collectFlatAddressOperands returned for the intrinsic.
+ /// \returns true if the intrinsic /// was handled.
+ bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const;
+
/// Test whether calls to a function lower to actual program function
/// calls.
///
@@ -469,12 +483,17 @@ public:
bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
- /// Allow peeling off loop iterations for loops with low dynamic tripcount.
+ /// Allow peeling off loop iterations.
bool AllowPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;
+ /// Allow peeling basing on profile. Uses to enable peeling off all
+ /// iterations basing on provided profile.
+ /// If the value is true the peeling cost model can decide to peel only
+ /// some iterations and in this case it will set this to false.
+ bool PeelProfiledIterations;
/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
/// value above is used during unroll and jam for the outer loop size.
/// This value is used in the same manner to limit the size of the inner
@@ -555,15 +574,15 @@ public:
/// modes that operate across loop iterations.
bool shouldFavorBackedgeIndex(const Loop *L) const;
- /// Return true if the target supports masked load.
- bool isLegalMaskedStore(Type *DataType) const;
/// Return true if the target supports masked store.
- bool isLegalMaskedLoad(Type *DataType) const;
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const;
+ /// Return true if the target supports masked load.
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const;
/// Return true if the target supports nontemporal store.
- bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
+ bool isLegalNTStore(Type *DataType, Align Alignment) const;
/// Return true if the target supports nontemporal load.
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
+ bool isLegalNTLoad(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType) const;
@@ -622,12 +641,6 @@ public:
/// Return true if this type is legal.
bool isTypeLegal(Type *Ty) const;
- /// Returns the target's jmp_buf alignment in bytes.
- unsigned getJumpBufAlignment() const;
-
- /// Returns the target's jmp_buf size in bytes.
- unsigned getJumpBufSize() const;
-
/// Return true if switches should be turned into lookup tables for the
/// target.
bool shouldBuildLookupTables() const;
@@ -775,10 +788,23 @@ public:
/// Additional properties of an operand's values.
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
- /// \return The number of scalar or vector registers that the target has.
- /// If 'Vectors' is true, it returns the number of vector registers. If it is
- /// set to false, it returns the number of scalar registers.
- unsigned getNumberOfRegisters(bool Vector) const;
+ /// \return the number of registers in the target-provided register class.
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
+
+ /// \return the target-provided register class ID for the provided type,
+ /// accounting for type promotion and other type-legalization techniques that the target might apply.
+ /// However, it specifically does not account for the scalarization or splitting of vector types.
+ /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
+ /// that type should be mapped to a register class containing no registers.
+ /// Specifically, this is designed to provide a simple, high-level view of the register allocation
+ /// later performed by the backend. These register classes don't necessarily map onto the
+ /// register classes used by the backend.
+ /// FIXME: It's not currently possible to determine how many registers
+ /// are used by the provided type.
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+
+ /// \return the target-provided register class name
+ const char* getRegisterClassName(unsigned ClassID) const;
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
@@ -824,18 +850,20 @@ public:
/// \return The associativity of the cache level, if available.
llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
- /// \return How much before a load we should place the prefetch instruction.
- /// This is currently measured in number of instructions.
+ /// \return How much before a load we should place the prefetch
+ /// instruction. This is currently measured in number of
+ /// instructions.
unsigned getPrefetchDistance() const;
- /// \return Some HW prefetchers can handle accesses up to a certain constant
- /// stride. This is the minimum stride in bytes where it makes sense to start
- /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
+ /// \return Some HW prefetchers can handle accesses up to a certain
+ /// constant stride. This is the minimum stride in bytes where it
+ /// makes sense to start adding SW prefetches. The default is 1,
+ /// i.e. prefetch with any stride.
unsigned getMinPrefetchStride() const;
- /// \return The maximum number of iterations to prefetch ahead. If the
- /// required number of iterations is more than this number, no prefetching is
- /// performed.
+ /// \return The maximum number of iterations to prefetch ahead. If
+ /// the required number of iterations is more than this number, no
+ /// prefetching is performed.
unsigned getMaxPrefetchIterationsAhead() const;
/// \return The maximum interleave factor that any transform should try to
@@ -1155,6 +1183,10 @@ public:
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
virtual unsigned getFlatAddressSpace() = 0;
+ virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const = 0;
+ virtual bool rewriteIntrinsicWithAddressSpace(
+ IntrinsicInst *II, Value *OldV, Value *NewV) const = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
@@ -1177,10 +1209,10 @@ public:
TargetLibraryInfo *LibInfo) = 0;
virtual bool shouldFavorPostInc() const = 0;
virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
- virtual bool isLegalMaskedStore(Type *DataType) = 0;
- virtual bool isLegalMaskedLoad(Type *DataType) = 0;
- virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
- virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
+ virtual bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) = 0;
+ virtual bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) = 0;
+ virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
virtual bool isLegalMaskedGather(Type *DataType) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
@@ -1196,8 +1228,6 @@ public:
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
- virtual unsigned getJumpBufAlignment() = 0;
- virtual unsigned getJumpBufSize() = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
@@ -1228,19 +1258,35 @@ public:
Type *Ty) = 0;
virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
- virtual unsigned getNumberOfRegisters(bool Vector) = 0;
+ virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
+ virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
+ virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() = 0;
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
- virtual unsigned getCacheLineSize() = 0;
- virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
- virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
- virtual unsigned getPrefetchDistance() = 0;
- virtual unsigned getMinPrefetchStride() = 0;
- virtual unsigned getMaxPrefetchIterationsAhead() = 0;
+ virtual unsigned getCacheLineSize() const = 0;
+ virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
+ virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
+
+ /// \return How much before a load we should place the prefetch
+ /// instruction. This is currently measured in number of
+ /// instructions.
+ virtual unsigned getPrefetchDistance() const = 0;
+
+ /// \return Some HW prefetchers can handle accesses up to a certain
+ /// constant stride. This is the minimum stride in bytes where it
+ /// makes sense to start adding SW prefetches. The default is 1,
+ /// i.e. prefetch with any stride.
+ virtual unsigned getMinPrefetchStride() const = 0;
+
+ /// \return The maximum number of iterations to prefetch ahead. If
+ /// the required number of iterations is more than this number, no
+ /// prefetching is performed.
+ virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
+
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -1395,6 +1441,16 @@ public:
return Impl.getFlatAddressSpace();
}
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const override {
+ return Impl.collectFlatAddressOperands(OpIndexes, IID);
+ }
+
+ bool rewriteIntrinsicWithAddressSpace(
+ IntrinsicInst *II, Value *OldV, Value *NewV) const override {
+ return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
+ }
+
bool isLoweredToCall(const Function *F) override {
return Impl.isLoweredToCall(F);
}
@@ -1440,16 +1496,16 @@ public:
bool shouldFavorBackedgeIndex(const Loop *L) const override {
return Impl.shouldFavorBackedgeIndex(L);
}
- bool isLegalMaskedStore(Type *DataType) override {
- return Impl.isLegalMaskedStore(DataType);
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) override {
+ return Impl.isLegalMaskedStore(DataType, Alignment);
}
- bool isLegalMaskedLoad(Type *DataType) override {
- return Impl.isLegalMaskedLoad(DataType);
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) override {
+ return Impl.isLegalMaskedLoad(DataType, Alignment);
}
- bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
+ bool isLegalNTStore(Type *DataType, Align Alignment) override {
return Impl.isLegalNTStore(DataType, Alignment);
}
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
+ bool isLegalNTLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);
}
bool isLegalMaskedScatter(Type *DataType) override {
@@ -1490,8 +1546,6 @@ public:
}
bool useAA() override { return Impl.useAA(); }
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
- unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
- unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
bool shouldBuildLookupTables() override {
return Impl.shouldBuildLookupTables();
}
@@ -1563,8 +1617,14 @@ public:
Type *Ty) override {
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
}
- unsigned getNumberOfRegisters(bool Vector) override {
- return Impl.getNumberOfRegisters(Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const override {
+ return Impl.getNumberOfRegisters(ClassID);
+ }
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
+ return Impl.getRegisterClassForType(Vector, Ty);
+ }
+ const char* getRegisterClassName(unsigned ClassID) const override {
+ return Impl.getRegisterClassName(ClassID);
}
unsigned getRegisterBitWidth(bool Vector) const override {
return Impl.getRegisterBitWidth(Vector);
@@ -1583,22 +1643,36 @@ public:
return Impl.shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
- unsigned getCacheLineSize() override {
+ unsigned getCacheLineSize() const override {
return Impl.getCacheLineSize();
}
- llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
+ llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const override {
return Impl.getCacheSize(Level);
}
- llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
+ llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
return Impl.getCacheAssociativity(Level);
}
- unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
- unsigned getMinPrefetchStride() override {
+
+ /// Return the preferred prefetch distance in terms of instructions.
+ ///
+ unsigned getPrefetchDistance() const override {
+ return Impl.getPrefetchDistance();
+ }
+
+ /// Return the minimum stride necessary to trigger software
+ /// prefetching.
+ ///
+ unsigned getMinPrefetchStride() const override {
return Impl.getMinPrefetchStride();
}
- unsigned getMaxPrefetchIterationsAhead() override {
+
+ /// Return the maximum prefetch distance in terms of loop
+ /// iterations.
+ ///
+ unsigned getMaxPrefetchIterationsAhead() const override {
return Impl.getMaxPrefetchIterationsAhead();
}
+
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index b99e1eb9adf0..a431fa0d458b 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -156,6 +156,16 @@ public:
return -1;
}
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const {
+ return false;
+ }
+
+ bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const {
+ return false;
+ }
+
bool isLoweredToCall(const Function *F) {
assert(F && "A concrete function must be provided to this routine.");
@@ -233,18 +243,18 @@ public:
bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
- bool isLegalMaskedStore(Type *DataType) { return false; }
+ bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
- bool isLegalMaskedLoad(Type *DataType) { return false; }
+ bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
- bool isLegalNTStore(Type *DataType, unsigned Alignment) {
+ bool isLegalNTStore(Type *DataType, Align Alignment) {
// By default, assume nontemporal memory stores are available for stores
// that are aligned and have a size that is a power of 2.
unsigned DataSize = DL.getTypeStoreSize(DataType);
return Alignment >= DataSize && isPowerOf2_32(DataSize);
}
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) {
+ bool isLegalNTLoad(Type *DataType, Align Alignment) {
// By default, assume nontemporal memory loads are available for loads that
// are aligned and have a size that is a power of 2.
unsigned DataSize = DL.getTypeStoreSize(DataType);
@@ -284,10 +294,6 @@ public:
bool isTypeLegal(Type *Ty) { return false; }
- unsigned getJumpBufAlignment() { return 0; }
-
- unsigned getJumpBufSize() { return 0; }
-
bool shouldBuildLookupTables() { return true; }
bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
@@ -348,7 +354,20 @@ public:
return TTI::TCC_Free;
}
- unsigned getNumberOfRegisters(bool Vector) { return 8; }
+ unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
+
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
+ return Vector ? 1 : 0;
+ };
+
+ const char* getRegisterClassName(unsigned ClassID) const {
+ switch (ClassID) {
+ default:
+ return "Generic::Unknown Register Class";
+ case 0: return "Generic::ScalarRC";
+ case 1: return "Generic::VectorRC";
+ }
+ }
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
@@ -365,21 +384,20 @@ public:
return false;
}
- unsigned getCacheLineSize() { return 0; }
+ unsigned getCacheLineSize() const { return 0; }
- llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
+ llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
switch (Level) {
case TargetTransformInfo::CacheLevel::L1D:
LLVM_FALLTHROUGH;
case TargetTransformInfo::CacheLevel::L2D:
return llvm::Optional<unsigned>();
}
-
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}
llvm::Optional<unsigned> getCacheAssociativity(
- TargetTransformInfo::CacheLevel Level) {
+ TargetTransformInfo::CacheLevel Level) const {
switch (Level) {
case TargetTransformInfo::CacheLevel::L1D:
LLVM_FALLTHROUGH;
@@ -390,11 +408,9 @@ public:
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}
- unsigned getPrefetchDistance() { return 0; }
-
- unsigned getMinPrefetchStride() { return 1; }
-
- unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+ unsigned getPrefetchDistance() const { return 0; }
+ unsigned getMinPrefetchStride() const { return 1; }
+ unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
@@ -830,6 +846,9 @@ public:
if (isa<PHINode>(U))
return TTI::TCC_Free; // Model all PHI nodes as free.
+ if (isa<ExtractValueInst>(U))
+ return TTI::TCC_Free; // Model all ExtractValue nodes as free.
+
// Static alloca doesn't generate target instructions.
if (auto *A = dyn_cast<AllocaInst>(U))
if (A->isStaticAlloca())
diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h
index 82cf8efeea54..43ce26147c2e 100644
--- a/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/include/llvm/Analysis/TypeMetadataUtils.h
@@ -50,6 +50,8 @@ void findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<Instruction *> &LoadedPtrs,
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
const CallInst *CI, DominatorTree &DT);
+
+Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M);
}
#endif
diff --git a/include/llvm/Analysis/Utils/Local.h b/include/llvm/Analysis/Utils/Local.h
index acbdf5dca32c..a63bcec9bc41 100644
--- a/include/llvm/Analysis/Utils/Local.h
+++ b/include/llvm/Analysis/Utils/Local.h
@@ -32,7 +32,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
Value *Result = Constant::getNullValue(IntPtrTy);
// If the GEP is inbounds, we know that none of the addressing operations will
- // overflow in an unsigned sense.
+ // overflow in a signed sense.
bool isInBounds = GEPOp->isInBounds() && !NoAssumptions;
// Build a mask for high order bits.
@@ -51,10 +51,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = GTI.getStructTypeOrNull()) {
- if (OpC->getType()->isVectorTy())
- OpC = OpC->getSplatValue();
-
- uint64_t OpValue = cast<ConstantInt>(OpC)->getZExtValue();
+ uint64_t OpValue = OpC->getUniqueInteger().getZExtValue();
Size = DL.getStructLayout(STy)->getElementOffset(OpValue);
if (Size)
@@ -63,20 +60,31 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
continue;
}
+ // Splat the constant if needed.
+ if (IntPtrTy->isVectorTy() && !OpC->getType()->isVectorTy())
+ OpC = ConstantVector::getSplat(IntPtrTy->getVectorNumElements(), OpC);
+
Constant *Scale = ConstantInt::get(IntPtrTy, Size);
Constant *OC = ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
- Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
+ Scale =
+ ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/);
// Emit an add instruction.
Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
continue;
}
+
+ // Splat the index if needed.
+ if (IntPtrTy->isVectorTy() && !Op->getType()->isVectorTy())
+ Op = Builder->CreateVectorSplat(IntPtrTy->getVectorNumElements(), Op);
+
// Convert to correct type.
if (Op->getType() != IntPtrTy)
Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
if (Size != 1) {
// We'll let instcombine(mul) convert this to a shl if possible.
Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
- GEP->getName()+".idx", isInBounds /*NUW*/);
+ GEP->getName() + ".idx", false /*NUW*/,
+ isInBounds /*NSW*/);
}
// Emit an add instruction.
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index fa7e0e0eef7e..33b064fcf9d2 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -242,19 +242,21 @@ class Value;
/// This is a wrapper around Value::stripAndAccumulateConstantOffsets that
/// creates and later unpacks the required APInt.
inline Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout &DL) {
+ const DataLayout &DL,
+ bool AllowNonInbounds = true) {
APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
Value *Base =
- Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt,
- /* AllowNonInbounds */ true);
+ Ptr->stripAndAccumulateConstantOffsets(DL, OffsetAPInt, AllowNonInbounds);
+
Offset = OffsetAPInt.getSExtValue();
return Base;
}
- inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr,
- int64_t &Offset,
- const DataLayout &DL) {
- return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset,
- DL);
+ inline const Value *
+ GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
+ const DataLayout &DL,
+ bool AllowNonInbounds = true) {
+ return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, DL,
+ AllowNonInbounds);
}
/// Returns true if the GEP is based on a pointer to a string (array of
@@ -307,20 +309,26 @@ class Value;
uint64_t GetStringLength(const Value *V, unsigned CharSize = 8);
/// This function returns call pointer argument that is considered the same by
- /// aliasing rules. You CAN'T use it to replace one value with another.
- const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call);
- inline Value *getArgumentAliasingToReturnedPointer(CallBase *Call) {
+ /// aliasing rules. You CAN'T use it to replace one value with another. If
+ /// \p MustPreserveNullness is true, the call must preserve the nullness of
+ /// the pointer.
+ const Value *getArgumentAliasingToReturnedPointer(const CallBase *Call,
+ bool MustPreserveNullness);
+ inline Value *
+ getArgumentAliasingToReturnedPointer(CallBase *Call,
+ bool MustPreserveNullness) {
return const_cast<Value *>(getArgumentAliasingToReturnedPointer(
- const_cast<const CallBase *>(Call)));
+ const_cast<const CallBase *>(Call), MustPreserveNullness));
}
- // {launder,strip}.invariant.group returns pointer that aliases its argument,
- // and it only captures pointer by returning it.
- // These intrinsics are not marked as nocapture, because returning is
- // considered as capture. The arguments are not marked as returned neither,
- // because it would make it useless.
+ /// {launder,strip}.invariant.group returns pointer that aliases its argument,
+ /// and it only captures pointer by returning it.
+ /// These intrinsics are not marked as nocapture, because returning is
+ /// considered as capture. The arguments are not marked as returned neither,
+ /// because it would make it useless. If \p MustPreserveNullness is true,
+ /// the intrinsic must preserve the nullness of the pointer.
bool isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
- const CallBase *Call);
+ const CallBase *Call, bool MustPreserveNullness);
/// This method strips off any GEP address adjustments and pointer casts from
/// the specified value, returning the original object being addressed. Note
@@ -376,6 +384,13 @@ class Value;
/// Return true if the only users of this pointer are lifetime markers.
bool onlyUsedByLifetimeMarkers(const Value *V);
+ /// Return true if speculation of the given load must be suppressed to avoid
+ /// ordering or interfering with an active sanitizer. If not suppressed,
+ /// dereferenceability and alignment must be proven separately. Note: This
+ /// is only needed for raw reasoning; if you use the interface below
+ /// (isSafeToSpeculativelyExecute), this is handled internally.
+ bool mustSuppressSpeculation(const LoadInst &LI);
+
/// Return true if the instruction does not have any effects besides
/// calculating the result and does not have undefined behavior.
///
@@ -605,12 +620,12 @@ class Value;
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp = nullptr,
unsigned Depth = 0);
+
inline SelectPatternResult
- matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS,
- Instruction::CastOps *CastOp = nullptr) {
- Value *L = const_cast<Value*>(LHS);
- Value *R = const_cast<Value*>(RHS);
- auto Result = matchSelectPattern(const_cast<Value*>(V), L, R);
+ matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS) {
+ Value *L = const_cast<Value *>(LHS);
+ Value *R = const_cast<Value *>(RHS);
+ auto Result = matchSelectPattern(const_cast<Value *>(V), L, R);
LHS = L;
RHS = R;
return Result;
@@ -654,6 +669,12 @@ class Value;
Optional<bool> isImpliedByDomCondition(const Value *Cond,
const Instruction *ContextI,
const DataLayout &DL);
+
+ /// If Ptr1 is provably equal to Ptr2 plus a constant offset, return that
+ /// offset. For example, Ptr1 might be &A[42], and Ptr2 might be &A[40]. In
+ /// this case offset would be -8.
+ Optional<int64_t> isPointerOffset(const Value *Ptr1, const Value *Ptr2,
+ const DataLayout &DL);
} // end namespace llvm
#endif // LLVM_ANALYSIS_VALUETRACKING_H
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index d93d2bc4570b..4a61c2bc35c7 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -15,18 +15,129 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/CheckedArithmetic.h"
namespace llvm {
+/// Describes the type of Parameters
+enum class VFParamKind {
+ Vector, // No semantic information.
+ OMP_Linear, // declare simd linear(i)
+ OMP_LinearRef, // declare simd linear(ref(i))
+ OMP_LinearVal, // declare simd linear(val(i))
+ OMP_LinearUVal, // declare simd linear(uval(i))
+ OMP_LinearPos, // declare simd linear(i:c) uniform(c)
+ OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c)
+ OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c)
+ OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c
+ OMP_Uniform, // declare simd uniform(i)
+ GlobalPredicate, // Global logical predicate that acts on all lanes
+ // of the input and output mask concurrently. For
+ // example, it is implied by the `M` token in the
+ // Vector Function ABI mangled name.
+ Unknown
+};
+
+/// Describes the type of Instruction Set Architecture
+enum class VFISAKind {
+ AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
+ SVE, // AArch64 Scalable Vector Extension
+ SSE, // x86 SSE
+ AVX, // x86 AVX
+ AVX2, // x86 AVX2
+ AVX512, // x86 AVX512
+ Unknown // Unknown ISA
+};
+
+/// Encapsulates information needed to describe a parameter.
+///
+/// The description of the parameter is not linked directly to
+/// OpenMP or any other vector function description. This structure
+/// is extendible to handle other paradigms that describe vector
+/// functions and their parameters.
+struct VFParameter {
+ unsigned ParamPos; // Parameter Position in Scalar Function.
+ VFParamKind ParamKind; // Kind of Parameter.
+ int LinearStepOrPos = 0; // Step or Position of the Parameter.
+ Align Alignment = Align(); // Optional aligment in bytes, defaulted to 1.
+
+ // Comparison operator.
+ bool operator==(const VFParameter &Other) const {
+ return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) ==
+ std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos,
+ Other.Alignment);
+ }
+};
+
+/// Contains the information about the kind of vectorization
+/// available.
+///
+/// This object in independent on the paradigm used to
+/// represent vector functions. in particular, it is not attached to
+/// any target-specific ABI.
+struct VFShape {
+ unsigned VF; // Vectorization factor.
+ bool IsScalable; // True if the function is a scalable function.
+ VFISAKind ISA; // Instruction Set Architecture.
+ SmallVector<VFParameter, 8> Parameters; // List of parameter informations.
+ // Comparison operator.
+ bool operator==(const VFShape &Other) const {
+ return std::tie(VF, IsScalable, ISA, Parameters) ==
+ std::tie(Other.VF, Other.IsScalable, Other.ISA, Other.Parameters);
+ }
+};
+
+/// Holds the VFShape for a specific scalar to vector function mapping.
+struct VFInfo {
+ VFShape Shape; // Classification of the vector function.
+ StringRef ScalarName; // Scalar Function Name.
+ StringRef VectorName; // Vector Function Name associated to this VFInfo.
+
+ // Comparison operator.
+ bool operator==(const VFInfo &Other) const {
+ return std::tie(Shape, ScalarName, VectorName) ==
+ std::tie(Shape, Other.ScalarName, Other.VectorName);
+ }
+};
+
+namespace VFABI {
+/// Function to contruct a VFInfo out of a mangled names in the
+/// following format:
+///
+/// <VFABI_name>{(<redirection>)}
+///
+/// where <VFABI_name> is the name of the vector function, mangled according
+/// to the rules described in the Vector Function ABI of the target vector
+/// extentsion (or <isa> from now on). The <VFABI_name> is in the following
+/// format:
+///
+/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]
+///
+/// This methods support demangling rules for the following <isa>:
+///
+/// * AArch64: https://developer.arm.com/docs/101129/latest
+///
+/// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and
+/// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt
+///
+///
+///
+/// \param MangledName -> input string in the format
+/// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)].
+Optional<VFInfo> tryDemangleForVFABI(StringRef MangledName);
+
+/// Retrieve the `VFParamKind` from a string token.
+VFParamKind getVFParamKindFromString(const StringRef Token);
+} // end namespace VFABI
+
template <typename T> class ArrayRef;
class DemandedBits;
class GetElementPtrInst;
template <typename InstTy> class InterleaveGroup;
class Loop;
class ScalarEvolution;
+class TargetLibraryInfo;
class TargetTransformInfo;
class Type;
class Value;
@@ -270,13 +381,12 @@ APInt possiblyDemandedEltsInMask(Value *Mask);
/// the interleaved store group doesn't allow gaps.
template <typename InstTy> class InterleaveGroup {
public:
- InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
- : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
-
- InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
- : Align(Align), InsertPos(Instr) {
- assert(Align && "The alignment should be non-zero");
+ InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
+ : Factor(Factor), Reverse(Reverse), Alignment(Alignment),
+ InsertPos(nullptr) {}
+ InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
+ : Alignment(Alignment), InsertPos(Instr) {
Factor = std::abs(Stride);
assert(Factor > 1 && "Invalid interleave factor");
@@ -286,7 +396,7 @@ public:
bool isReverse() const { return Reverse; }
uint32_t getFactor() const { return Factor; }
- uint32_t getAlignment() const { return Align; }
+ uint32_t getAlignment() const { return Alignment.value(); }
uint32_t getNumMembers() const { return Members.size(); }
/// Try to insert a new member \p Instr with index \p Index and
@@ -294,9 +404,7 @@ public:
/// negative if it is the new leader.
///
/// \returns false if the instruction doesn't belong to the group.
- bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
- assert(NewAlign && "The new member's alignment should be non-zero");
-
+ bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {
// Make sure the key fits in an int32_t.
Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
if (!MaybeKey)
@@ -328,7 +436,7 @@ public:
}
// It's always safe to select the minimum alignment.
- Align = std::min(Align, NewAlign);
+ Alignment = std::min(Alignment, NewAlign);
Members[Key] = Instr;
return true;
}
@@ -387,7 +495,7 @@ public:
private:
uint32_t Factor; // Interleave Factor.
bool Reverse;
- uint32_t Align;
+ Align Alignment;
DenseMap<int32_t, InstTy *> Members;
int32_t SmallestKey = 0;
int32_t LargestKey = 0;
@@ -504,8 +612,8 @@ private:
struct StrideDescriptor {
StrideDescriptor() = default;
StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,
- unsigned Align)
- : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {}
+ Align Alignment)
+ : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}
// The access's stride. It is negative for a reverse access.
int64_t Stride = 0;
@@ -517,7 +625,7 @@ private:
uint64_t Size = 0;
// The alignment of this access.
- unsigned Align = 0;
+ Align Alignment;
};
/// A type for holding instructions and their stride descriptors.
@@ -528,11 +636,11 @@ private:
///
/// \returns the newly created interleave group.
InterleaveGroup<Instruction> *
- createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) {
+ createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {
assert(!InterleaveGroupMap.count(Instr) &&
"Already in an interleaved access group");
InterleaveGroupMap[Instr] =
- new InterleaveGroup<Instruction>(Instr, Stride, Align);
+ new InterleaveGroup<Instruction>(Instr, Stride, Alignment);
InterleaveGroups.insert(InterleaveGroupMap[Instr]);
return InterleaveGroupMap[Instr];
}