aboutsummaryrefslogtreecommitdiff
path: root/llvm/include
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/include')
-rw-r--r--llvm/include/llvm-c/Core.h11
-rw-r--r--llvm/include/llvm/ADT/APInt.h4
-rw-r--r--llvm/include/llvm/ADT/SCCIterator.h133
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h31
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h8
-rw-r--r--llvm/include/llvm/Analysis/Lint.h9
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h29
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.def6
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h2
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h7
-rw-r--r--llvm/include/llvm/CodeGen/CommandFlags.h5
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h49
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h42
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Utils.h12
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetFrameLowering.h14
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h6
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h65
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h5
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h5
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h175
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPConstants.h4
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h125
-rw-r--r--llvm/include/llvm/IR/Instructions.h18
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td18
-rw-r--r--llvm/include/llvm/IR/Operator.h8
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h25
-rw-r--r--llvm/include/llvm/IR/Type.h2
-rw-r--r--llvm/include/llvm/IR/VPIntrinsics.def156
-rw-r--r--llvm/include/llvm/IR/Verifier.h6
-rw-r--r--llvm/include/llvm/MCA/HardwareUnits/Scheduler.h7
-rw-r--r--llvm/include/llvm/ObjectYAML/MachOYAML.h1
-rw-r--r--llvm/include/llvm/ProfileData/MemProfData.inc61
-rw-r--r--llvm/include/llvm/ProfileData/RawMemProfReader.h43
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.def3
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.h9
-rw-r--r--llvm/include/llvm/Support/ARMAttributeParser.h4
-rw-r--r--llvm/include/llvm/Support/ARMBuildAttributes.h24
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.def1
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.h2
-rw-r--r--llvm/include/llvm/Support/GenericDomTree.h16
-rw-r--r--llvm/include/llvm/Support/HTTPClient.h113
-rw-r--r--llvm/include/llvm/Support/Mutex.h4
-rw-r--r--llvm/include/llvm/Support/RWMutex.h8
-rw-r--r--llvm/include/llvm/Support/TargetParser.h12
-rw-r--r--llvm/include/llvm/Support/ThreadPool.h70
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td96
-rw-r--r--llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h104
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdater.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileInference.h284
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h162
54 files changed, 1597 insertions, 422 deletions
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index d170eff17951..f2183ff52bfb 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -2377,10 +2377,21 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit);
*
* @{
*/
+
+/** Deprecated: Use LLVMAddAlias2 instead. */
LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
const char *Name);
/**
+ * Add a GlobalAlias with the given value type, address space and aliasee.
+ *
+ * @see llvm::GlobalAlias::create()
+ */
+LLVMValueRef LLVMAddAlias2(LLVMModuleRef M, LLVMTypeRef ValueTy,
+ unsigned AddrSpace, LLVMValueRef Aliasee,
+ const char *Name);
+
+/**
* Obtain a GlobalAlias value from a Module by its name.
*
* The returned value corresponds to a llvm::GlobalAlias value.
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 595cd94b6b8f..c2660502a419 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1458,10 +1458,8 @@ public:
/// uint64_t. The bitwidth must be <= 64 or the value must fit within a
/// uint64_t. Otherwise an assertion will result.
uint64_t getZExtValue() const {
- if (isSingleWord()) {
- assert(BitWidth && "zero width values not allowed");
+ if (isSingleWord())
return U.VAL;
- }
assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
return U.pVal[0];
}
diff --git a/llvm/include/llvm/ADT/SCCIterator.h b/llvm/include/llvm/ADT/SCCIterator.h
index 8a7c0a78a0fc..ad35e09f0f74 100644
--- a/llvm/include/llvm/ADT/SCCIterator.h
+++ b/llvm/include/llvm/ADT/SCCIterator.h
@@ -28,6 +28,10 @@
#include <cassert>
#include <cstddef>
#include <iterator>
+#include <queue>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
#include <vector>
namespace llvm {
@@ -234,6 +238,135 @@ template <class T> scc_iterator<T> scc_end(const T &G) {
return scc_iterator<T>::end(G);
}
+/// Sort the nodes of a directed SCC in the decreasing order of the edge
+/// weights. The instantiating GraphT type should have weighted edge type
+/// declared in its graph traits in order to use this iterator.
+///
+/// This is implemented using Kruskal's minimal spanning tree algorithm followed
+/// by a BFS walk. First a maximum spanning tree (forest) is built based on all
+/// edges within the SCC collection. Then a BFS walk is initiated on tree nodes
+/// that do not have a predecessor. Finally, the BFS order computed is the
+/// traversal order of the nodes of the SCC. Such order ensures that
+/// high-weighted edges are visited first during the tranversal.
+template <class GraphT, class GT = GraphTraits<GraphT>>
+class scc_member_iterator {
+ using NodeType = typename GT::NodeType;
+ using EdgeType = typename GT::EdgeType;
+ using NodesType = std::vector<NodeType *>;
+
+ // Auxilary node information used during the MST calculation.
+ struct NodeInfo {
+ NodeInfo *Group = this;
+ uint32_t Rank = 0;
+ bool Visited = true;
+ };
+
+ // Find the root group of the node and compress the path from node to the
+ // root.
+ NodeInfo *find(NodeInfo *Node) {
+ if (Node->Group != Node)
+ Node->Group = find(Node->Group);
+ return Node->Group;
+ }
+
+ // Union the source and target node into the same group and return true.
+ // Returns false if they are already in the same group.
+ bool unionGroups(const EdgeType *Edge) {
+ NodeInfo *G1 = find(&NodeInfoMap[Edge->Source]);
+ NodeInfo *G2 = find(&NodeInfoMap[Edge->Target]);
+
+ // If the edge forms a cycle, do not add it to MST
+ if (G1 == G2)
+ return false;
+
+ // Make the smaller rank tree a direct child or the root of high rank tree.
+ if (G1->Rank < G1->Rank)
+ G1->Group = G2;
+ else {
+ G2->Group = G1;
+ // If the ranks are the same, increment root of one tree by one.
+ if (G1->Rank == G2->Rank)
+ G2->Rank++;
+ }
+ return true;
+ }
+
+ std::unordered_map<NodeType *, NodeInfo> NodeInfoMap;
+ NodesType Nodes;
+
+public:
+ scc_member_iterator(const NodesType &InputNodes);
+
+ NodesType &operator*() { return Nodes; }
+};
+
+template <class GraphT, class GT>
+scc_member_iterator<GraphT, GT>::scc_member_iterator(
+ const NodesType &InputNodes) {
+ if (InputNodes.size() <= 1) {
+ Nodes = InputNodes;
+ return;
+ }
+
+ // Initialize auxilary node information.
+ NodeInfoMap.clear();
+ for (auto *Node : InputNodes) {
+ // This is specifically used to construct a `NodeInfo` object in place. An
+ // insert operation will involve a copy construction which invalidate the
+ // initial value of the `Group` field which should be `this`.
+ (void)NodeInfoMap[Node].Group;
+ }
+
+ // Sort edges by weights.
+ struct EdgeComparer {
+ bool operator()(const EdgeType *L, const EdgeType *R) const {
+ return L->Weight > R->Weight;
+ }
+ };
+
+ std::multiset<const EdgeType *, EdgeComparer> SortedEdges;
+ for (auto *Node : InputNodes) {
+ for (auto &Edge : Node->Edges) {
+ if (NodeInfoMap.count(Edge.Target))
+ SortedEdges.insert(&Edge);
+ }
+ }
+
+ // Traverse all the edges and compute the Maximum Weight Spanning Tree
+ // using Kruskal's algorithm.
+ std::unordered_set<const EdgeType *> MSTEdges;
+ for (auto *Edge : SortedEdges) {
+ if (unionGroups(Edge))
+ MSTEdges.insert(Edge);
+ }
+
+ // Do BFS on MST, starting from nodes that have no incoming edge. These nodes
+ // are "roots" of the MST forest. This ensures that nodes are visited before
+ // their decsendents are, thus ensures hot edges are processed before cold
+ // edges, based on how MST is computed.
+ for (const auto *Edge : MSTEdges)
+ NodeInfoMap[Edge->Target].Visited = false;
+
+ std::queue<NodeType *> Queue;
+ for (auto &Node : NodeInfoMap)
+ if (Node.second.Visited)
+ Queue.push(Node.first);
+
+ while (!Queue.empty()) {
+ auto *Node = Queue.front();
+ Queue.pop();
+ Nodes.push_back(Node);
+ for (auto &Edge : Node->Edges) {
+ if (MSTEdges.count(&Edge) && !NodeInfoMap[Edge.Target].Visited) {
+ NodeInfoMap[Edge.Target].Visited = true;
+ Queue.push(Edge.Target);
+ }
+ }
+ }
+
+ assert(InputNodes.size() == Nodes.size() && "missing nodes in MST");
+ std::reverse(Nodes.begin(), Nodes.end());
+}
} // end namespace llvm
#endif // LLVM_ADT_SCCITERATOR_H
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 48f15b02283a..f9b658ca960a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1016,20 +1016,39 @@ public:
private:
std::tuple<RangeTs...> Ranges;
- template <size_t... Ns> iterator begin_impl(std::index_sequence<Ns...>) {
+ template <size_t... Ns>
+ iterator begin_impl(std::index_sequence<Ns...>) {
+ return iterator(std::get<Ns>(Ranges)...);
+ }
+ template <size_t... Ns>
+ iterator begin_impl(std::index_sequence<Ns...>) const {
return iterator(std::get<Ns>(Ranges)...);
}
template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) {
return iterator(make_range(std::end(std::get<Ns>(Ranges)),
std::end(std::get<Ns>(Ranges)))...);
}
+ template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const {
+ return iterator(make_range(std::end(std::get<Ns>(Ranges)),
+ std::end(std::get<Ns>(Ranges)))...);
+ }
public:
concat_range(RangeTs &&... Ranges)
: Ranges(std::forward<RangeTs>(Ranges)...) {}
- iterator begin() { return begin_impl(std::index_sequence_for<RangeTs...>{}); }
- iterator end() { return end_impl(std::index_sequence_for<RangeTs...>{}); }
+ iterator begin() {
+ return begin_impl(std::index_sequence_for<RangeTs...>{});
+ }
+ iterator begin() const {
+ return begin_impl(std::index_sequence_for<RangeTs...>{});
+ }
+ iterator end() {
+ return end_impl(std::index_sequence_for<RangeTs...>{});
+ }
+ iterator end() const {
+ return end_impl(std::index_sequence_for<RangeTs...>{});
+ }
};
} // end namespace detail
@@ -1977,10 +1996,16 @@ public:
enumerator_iter<R> begin() {
return enumerator_iter<R>(0, std::begin(TheRange));
}
+ enumerator_iter<R> begin() const {
+ return enumerator_iter<R>(0, std::begin(TheRange));
+ }
enumerator_iter<R> end() {
return enumerator_iter<R>(std::end(TheRange));
}
+ enumerator_iter<R> end() const {
+ return enumerator_iter<R>(std::end(TheRange));
+ }
private:
R TheRange;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index c26dbc457949..ea4c0312e073 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
@@ -50,6 +51,7 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
+ FMulAdd, ///< Fused multiply-add of floats (a * b + c).
SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
///< invariant
SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
@@ -260,6 +262,12 @@ public:
SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
Loop *L) const;
+ /// Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
+ static bool isFMulAddIntrinsic(Instruction *I) {
+ return isa<IntrinsicInst>(I) &&
+ cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
+ }
+
private:
// The starting value of the recurrence.
// It does not have to be zero!
diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h
index 6eb637e72782..4ceae2d29f16 100644
--- a/llvm/include/llvm/Analysis/Lint.h
+++ b/llvm/include/llvm/Analysis/Lint.h
@@ -6,11 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines lint interfaces that can be used for some sanity checking
-// of input to the system, and for checking that transformations
-// haven't done something bad. In contrast to the Verifier, the Lint checker
-// checks for undefined behavior or constructions with likely unintended
-// behavior.
+// This file defines lint interfaces that can be used for some validation of
+// input to the system, and for checking that transformations haven't done
+// something bad. In contrast to the Verifier, the Lint checker checks for
+// undefined behavior or constructions with likely unintended behavior.
//
// To see what specifically is checked, look at Lint.cpp
//
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index a2260688e3d6..df50611832ce 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1378,6 +1378,8 @@ private:
/// includes an exact count and a maximum count.
///
class BackedgeTakenInfo {
+ friend class ScalarEvolution;
+
/// A list of computable exits and their not-taken counts. Loops almost
/// never have more than one computable exit.
SmallVector<ExitNotTakenInfo, 1> ExitNotTaken;
@@ -1398,9 +1400,6 @@ private:
/// True iff the backedge is taken either exactly Max or zero times.
bool MaxOrZero = false;
- /// SCEV expressions used in any of the ExitNotTakenInfo counts.
- SmallPtrSet<const SCEV *, 4> Operands;
-
bool isComplete() const { return IsComplete; }
const SCEV *getConstantMax() const { return ConstantMax; }
@@ -1466,10 +1465,6 @@ private:
/// Return true if the number of times this backedge is taken is either the
/// value returned by getConstantMax or zero.
bool isConstantMaxOrZero(ScalarEvolution *SE) const;
-
- /// Return true if any backedge taken count expressions refer to the given
- /// subexpression.
- bool hasOperand(const SCEV *S) const;
};
/// Cache the backedge-taken count of the loops for this function as they
@@ -1480,6 +1475,10 @@ private:
/// function as they are computed.
DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts;
+ /// Loops whose backedge taken counts directly use this non-constant SCEV.
+ DenseMap<const SCEV *, SmallPtrSet<PointerIntPair<const Loop *, 1, bool>, 4>>
+ BECountUsers;
+
/// This map contains entries for all of the PHI instructions that we
/// attempt to compute constant evolutions for. This allows us to avoid
/// potentially expensive recomputation of these properties. An instruction
@@ -1492,6 +1491,11 @@ private:
DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>>
ValuesAtScopes;
+ /// Reverse map for invalidation purposes: Stores of which SCEV and which
+ /// loop this is the value-at-scope of.
+ DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>>
+ ValuesAtScopesUsers;
+
/// Memoized computeLoopDisposition results.
DenseMap<const SCEV *,
SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>>
@@ -1616,11 +1620,6 @@ private:
/// SCEV+Loop pair.
const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
- /// This looks up computed SCEV values for all instructions that depend on
- /// the given instruction and removes them from the ValueExprMap map if they
- /// reference SymName. This is used during PHI resolution.
- void forgetSymbolicName(Instruction *I, const SCEV *SymName);
-
/// Return the BackedgeTakenInfo for the given loop, lazily computing new
/// values if the loop hasn't been analyzed yet. The returned result is
/// guaranteed not to be predicated.
@@ -1911,6 +1910,9 @@ private:
bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
SCEV::NoWrapFlags &Flags);
+ /// Forget predicated/non-predicated backedge taken counts for the given loop.
+ void forgetBackedgeTakenCounts(const Loop *L, bool Predicated);
+
/// Drop memoized information for all \p SCEVs.
void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs);
@@ -1923,6 +1925,9 @@ private:
/// Erase Value from ValueExprMap and ExprValueMap.
void eraseValueFromMap(Value *V);
+ /// Insert V to S mapping into ValueExprMap and ExprValueMap.
+ void insertValueToMap(Value *V, const SCEV *S);
+
/// Return false iff given SCEV contains a SCEVUnknown with NULL value-
/// pointer.
bool checkValidity(const SCEV *S) const;
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index ded53617b304..9c1abef33b28 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -1048,6 +1048,12 @@ TLI_DEFINE_STRING_INTERNAL("memset")
/// void memset_pattern16(void *b, const void *pattern16, size_t len);
TLI_DEFINE_ENUM_INTERNAL(memset_pattern16)
TLI_DEFINE_STRING_INTERNAL("memset_pattern16")
+/// void memset_pattern4(void *b, const void *pattern4, size_t len);
+TLI_DEFINE_ENUM_INTERNAL(memset_pattern4)
+TLI_DEFINE_STRING_INTERNAL("memset_pattern4")
+/// void memset_pattern8(void *b, const void *pattern8, size_t len);
+TLI_DEFINE_ENUM_INTERNAL(memset_pattern8)
+TLI_DEFINE_STRING_INTERNAL("memset_pattern8")
/// int mkdir(const char *path, mode_t mode);
TLI_DEFINE_ENUM_INTERNAL(mkdir)
TLI_DEFINE_STRING_INTERNAL("mkdir")
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 24e2318de48b..751c88a4ecbb 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -115,7 +115,7 @@ struct VFShape {
return {EC, Parameters};
}
- /// Sanity check on the Parameters in the VFShape.
+ /// Validation check on the Parameters in the VFShape.
bool hasValidParameterList() const;
};
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index a270fd399aeb..c199e933116a 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1602,6 +1602,13 @@ enum {
NT_FREEBSD_PROCSTAT_AUXV = 16,
};
+// NetBSD core note types.
+enum {
+ NT_NETBSDCORE_PROCINFO = 1,
+ NT_NETBSDCORE_AUXV = 2,
+ NT_NETBSDCORE_LWPSTATUS = 24,
+};
+
// OpenBSD core note types.
enum {
NT_OPENBSD_PROCINFO = 10,
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
index ed3cd54df272..73d39fecc268 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -130,6 +130,7 @@ bool getEnableMachineFunctionSplitter();
bool getEnableDebugEntryValues();
bool getValueTrackingVariableLocations();
+Optional<bool> getExplicitValueTrackingVariableLocations();
bool getForceDwarfFrameSection();
@@ -170,6 +171,10 @@ void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F);
/// Set function attributes of functions in Module M based on CPU,
/// Features, and command line flags.
void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M);
+
+/// Should value-tracking variable locations / instruction referencing be
+/// enabled by default for this triple?
+bool getDefaultValueTrackingVariableLocations(const llvm::Triple &T);
} // namespace codegen
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ff4ad4b72636..f3fa652b0175 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -564,6 +564,7 @@ public:
/// This variant does not erase \p MI after calling the build function.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo);
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
bool matchRotateOutOfRange(MachineInstr &MI);
@@ -648,6 +649,54 @@ public:
/// (fma fneg(x), fneg(y), z) -> (fma x, y, z)
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally,
+ bool &HasFMAD, bool &Aggressive,
+ bool CanReassociate = false);
+
+ /// Transform (fadd (fmul x, y), z) -> (fma x, y, z)
+ /// (fadd (fmul x, y), z) -> (fmad x, y, z)
+ bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ /// (fadd (fpext (fmul x, y)), z) -> (fmad (fpext x), (fpext y), z)
+ bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
+ /// (fadd (fmad x, y, (fmul u, v)), z) -> (fmad x, y, (fmad u, v, z))
+ bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ // Transform (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ // (fadd (fmad x, y, (fpext (fmul u, v))), z)
+ // -> (fmad x, y, (fmad (fpext u), (fpext v), z))
+ bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fmul x, y), z) -> (fma x, y, -z)
+ /// (fsub (fmul x, y), z) -> (fmad x, y, -z)
+ bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ /// (fsub (fneg (fmul, x, y)), z) -> (fmad (fneg x), y, (fneg z))
+ bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fpext (fmul x, y)), z)
+ /// -> (fma (fpext x), (fpext y), (fneg z))
+ /// (fsub (fpext (fmul x, y)), z)
+ /// -> (fmad (fpext x), (fpext y), (fneg z))
+ bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fpext (fneg (fmul x, y))), z)
+ /// -> (fneg (fma (fpext x), (fpext y), z))
+ /// (fsub (fpext (fneg (fmul x, y))), z)
+ /// -> (fneg (fmad (fpext x), (fpext y), z))
+ bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index e813d030eec3..a41166bb4c6b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -129,6 +129,43 @@ inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) {
return SpecificConstantMatch(RequestedValue);
}
+/// Matcher for a specific constant splat.
+struct SpecificConstantSplatMatch {
+ int64_t RequestedVal;
+ SpecificConstantSplatMatch(int64_t RequestedVal)
+ : RequestedVal(RequestedVal) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ return isBuildVectorConstantSplat(Reg, MRI, RequestedVal,
+ /* AllowUndef */ false);
+ }
+};
+
+/// Matches a constant splat of \p RequestedValue.
+inline SpecificConstantSplatMatch m_SpecificICstSplat(int64_t RequestedValue) {
+ return SpecificConstantSplatMatch(RequestedValue);
+}
+
+/// Matcher for a specific constant or constant splat.
+struct SpecificConstantOrSplatMatch {
+ int64_t RequestedVal;
+ SpecificConstantOrSplatMatch(int64_t RequestedVal)
+ : RequestedVal(RequestedVal) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ int64_t MatchedVal;
+ if (mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal)
+ return true;
+ return isBuildVectorConstantSplat(Reg, MRI, RequestedVal,
+ /* AllowUndef */ false);
+ }
+};
+
+/// Matches a \p RequestedValue constant or a constant splat of \p
+/// RequestedValue.
+inline SpecificConstantOrSplatMatch
+m_SpecificICstOrSplat(int64_t RequestedValue) {
+ return SpecificConstantOrSplatMatch(RequestedValue);
+}
+
///{
/// Convenience matchers for specific integer values.
inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); }
@@ -489,6 +526,11 @@ inline UnaryOp_match<SrcTy, TargetOpcode::COPY> m_Copy(SrcTy &&Src) {
return UnaryOp_match<SrcTy, TargetOpcode::COPY>(std::forward<SrcTy>(Src));
}
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT> m_GFSqrt(const SrcTy &Src) {
+ return UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT>(Src);
+}
+
// General helper for generic MI compares, i.e. G_ICMP and G_FCMP
// TODO: Allow checking a specific predicate.
template <typename Pred_P, typename LHS_P, typename RHS_P, unsigned Opcode>
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 86545b976b8d..4126e2ac7b8f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -378,6 +378,18 @@ Optional<FPValueAndVReg> getFConstantSplat(Register VReg,
const MachineRegisterInfo &MRI,
bool AllowUndef = true);
+/// Return true if the specified register is defined by G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef.
+bool isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef);
+
+/// Return true if the specified instruction is a G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef.
+bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef);
+
/// Return true if the specified instruction is a G_BUILD_VECTOR or
/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
bool isBuildVectorAllZeros(const MachineInstr &MI,
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index dcbd19ac6b5a..ec23dde0c6c0 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -938,7 +938,8 @@ public:
int64_t Offset, LLT Ty);
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
- return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size));
+ return getMachineMemOperand(
+ MMO, Offset, Size == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * Size));
}
/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index fa22ca6a98ac..a855a0797723 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -139,10 +139,13 @@ public:
///
int getOffsetOfLocalArea() const { return LocalAreaOffset; }
- /// isFPCloseToIncomingSP - Return true if the frame pointer is close to
- /// the incoming stack pointer, false if it is close to the post-prologue
- /// stack pointer.
- virtual bool isFPCloseToIncomingSP() const { return true; }
+ /// Control the placement of special register scavenging spill slots when
+ /// allocating a stack frame.
+ ///
+ /// If this returns true, the frame indexes used by the RegScavenger will be
+ /// allocated closest to the incoming stack pointer.
+ virtual bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const;
/// assignCalleeSavedSpillSlots - Allows target to override spill slot
/// assignment logic. If implemented, assignCalleeSavedSpillSlots() should
@@ -220,6 +223,9 @@ public:
virtual void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologueMBB) const {}
+ /// Does the stack probe function call return with a modified stack pointer?
+ virtual bool stackProbeFunctionModifiesSP() const { return false; }
+
/// Adjust the prologue to have the function use segmented stacks. This works
/// by adding a check even before the "normal" function prologue.
virtual void adjustForSegmentedStacks(MachineFunction &MF,
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8bc730a3eda5..d43dd9fac85d 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1913,6 +1913,12 @@ public:
"Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
}
+ /// Optional target hook to create the LLVM IR attributes for the outlined
+ /// function. If overridden, the overriding function must call the default
+ /// implementation.
+ virtual void mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const;
+
/// Returns how or if \p MI should be outlined.
virtual outliner::InstrType
getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 87f5168ec48f..d862701c37d7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -425,6 +425,12 @@ public:
return true;
}
+ /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded
+ /// using generic code in SelectionDAGBuilder.
+ virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const {
+ return true;
+ }
+
/// Return true if it is profitable to convert a select of FP constants into
/// a constant pool load whose address depends on the select condition. The
/// parameter may be used to differentiate a select with FP compare from
@@ -806,9 +812,12 @@ public:
/// Return true if target always benefits from combining into FMA for a
/// given value type. This must typically return false on targets where FMA
/// takes more cycles to execute than FADD.
- virtual bool enableAggressiveFMAFusion(EVT VT) const {
- return false;
- }
+ virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; }
+
+ /// Return true if target always benefits from combining into FMA for a
+ /// given value type. This must typically return false on targets where FMA
+ /// takes more cycles to execute than FADD.
+ virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; }
/// Return the ValueType of the result of SETCC operations.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -2710,6 +2719,14 @@ public:
/// Return true if an fpext operation input to an \p Opcode operation is free
/// (for instance, because half-precision floating-point numbers are
/// implicitly extended to float-precision) for an FMA instruction.
+ virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
+ LLT DestTy, LLT SrcTy) const {
+ return false;
+ }
+
+ /// Return true if an fpext operation input to an \p Opcode operation is free
+ /// (for instance, because half-precision floating-point numbers are
+ /// implicitly extended to float-precision) for an FMA instruction.
virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
EVT DestVT, EVT SrcVT) const {
assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
@@ -2748,11 +2765,47 @@ public:
return false;
}
+ /// Return true if an FMA operation is faster than a pair of fmul and fadd
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+ /// returns true, otherwise fmuladd is expanded to fmul + fadd.
+ ///
+ /// NOTE: This may be called before legalization on types for which FMAs are
+ /// not legal, but should return true if those types will eventually legalize
+ /// to types that support FMAs. After legalization, it will only be called on
+ /// types that support FMAs (via Legal or Custom actions)
+ virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ LLT) const {
+ return false;
+ }
+
/// IR version
virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
return false;
}
+ /// Returns true if \p MI can be combined with another instruction to
+ /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD,
+ /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be
+ /// distributed into an fadd/fsub.
+ virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const {
+ assert((MI.getOpcode() == TargetOpcode::G_FADD ||
+ MI.getOpcode() == TargetOpcode::G_FSUB ||
+ MI.getOpcode() == TargetOpcode::G_FMUL) &&
+ "unexpected node in FMAD forming combine");
+ switch (Ty.getScalarSizeInBits()) {
+ case 16:
+ return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16);
+ case 32:
+ return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32);
+ case 64:
+ return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64);
+ default:
+ break;
+ }
+
+ return false;
+ }
+
/// Returns true if be combined with to form an ISD::FMAD. \p N may be an
/// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
/// fadd/fsub.
@@ -2852,6 +2905,12 @@ public:
/// passed to the fp16 to fp conversion library function.
virtual bool shouldKeepZExtForFP16Conv() const { return false; }
+ /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT
+ /// from min(max(fptoi)) saturation patterns.
+ virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const {
+ return isOperationLegalOrCustom(Op, VT);
+ }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 902973ff5722..ae1afeb668be 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -156,6 +156,11 @@ public:
NormalUnits.getNumInfoUnits());
}
+ const DWARFUnitVector &getNormalUnitsVector() {
+ parseNormalUnits();
+ return NormalUnits;
+ }
+
/// Get units from .debug_types in this context.
unit_iterator_range types_section_units() {
parseNormalUnits();
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index d471b80c7fe1..505686bfbf59 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include <cstdint>
#include <map>
#include <set>
@@ -153,8 +154,8 @@ private:
/// \param SectionKind The object-file section kind that S comes from.
///
/// \returns The number of errors that occurred during verification.
- unsigned verifyUnitSection(const DWARFSection &S,
- DWARFSectionKind SectionKind);
+ unsigned verifyUnitSection(const DWARFSection &S);
+ unsigned verifyUnits(const DWARFUnitVector &Units);
/// Verifies that a call site entry is nested within a subprogram with a
/// DW_AT_call attribute.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 362e8ab8e296..2180be3341e1 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -519,6 +519,7 @@ private:
/// symbols of an error.
class MaterializationResponsibility {
friend class ExecutionSession;
+ friend class JITDylib;
public:
MaterializationResponsibility(MaterializationResponsibility &&) = delete;
@@ -535,10 +536,10 @@ public:
/// Returns the target JITDylib that these symbols are being materialized
/// into.
- JITDylib &getTargetJITDylib() const { return *JD; }
+ JITDylib &getTargetJITDylib() const { return JD; }
/// Returns the ExecutionSession for this instance.
- ExecutionSession &getExecutionSession();
+ ExecutionSession &getExecutionSession() const;
/// Returns the symbol flags map for this responsibility instance.
/// Note: The returned flags may have transient flags (Lazy, Materializing)
@@ -640,15 +641,16 @@ public:
private:
/// Create a MaterializationResponsibility for the given JITDylib and
/// initial symbols.
- MaterializationResponsibility(JITDylibSP JD, SymbolFlagsMap SymbolFlags,
+ MaterializationResponsibility(ResourceTrackerSP RT,
+ SymbolFlagsMap SymbolFlags,
SymbolStringPtr InitSymbol)
- : JD(std::move(JD)), SymbolFlags(std::move(SymbolFlags)),
- InitSymbol(std::move(InitSymbol)) {
- assert(this->JD && "Cannot initialize with null JITDylib");
+ : JD(RT->getJITDylib()), RT(std::move(RT)),
+ SymbolFlags(std::move(SymbolFlags)), InitSymbol(std::move(InitSymbol)) {
assert(!this->SymbolFlags.empty() && "Materializing nothing?");
}
- JITDylibSP JD;
+ JITDylib &JD;
+ ResourceTrackerSP RT;
SymbolFlagsMap SymbolFlags;
SymbolStringPtr InitSymbol;
};
@@ -913,12 +915,26 @@ public:
const SymbolLookupSet &LookupSet) = 0;
};
-/// A symbol table that supports asynchoronous symbol queries.
+/// Represents a JIT'd dynamic library.
+///
+/// This class aims to mimic the behavior of a regular dylib or shared object,
+/// but without requiring the contained program representations to be compiled
+/// up-front. The JITDylib's content is defined by adding MaterializationUnits,
+/// and contained MaterializationUnits will typically rely on the JITDylib's
+/// links-against order to resolve external references (similar to a regular
+/// dylib).
+///
+/// The JITDylib object is a thin wrapper that references state held by the
+/// ExecutionSession. JITDylibs can be removed, clearing this underlying state
+/// and leaving the JITDylib object in a defunct state. In this state the
+/// JITDylib's name is guaranteed to remain accessible. If the ExecutionSession
+/// is still alive then other operations are callable but will return an Error
+/// or null result (depending on the API). It is illegal to call any operation
+/// other than getName on a JITDylib after the ExecutionSession has been torn
+/// down.
///
-/// Represents a virtual shared object. Instances can not be copied or moved, so
-/// their addresses may be used as keys for resource management.
-/// JITDylib state changes must be made via an ExecutionSession to guarantee
-/// that they are synchronized with respect to other JITDylib operations.
+/// JITDylibs cannot be moved or copied. Their address is stable, and useful as
+/// a key in some JIT data structures.
class JITDylib : public ThreadSafeRefCountedBase<JITDylib>,
public jitlink::JITLinkDylib {
friend class AsynchronousSymbolQuery;
@@ -931,10 +947,21 @@ public:
JITDylib &operator=(const JITDylib &) = delete;
JITDylib(JITDylib &&) = delete;
JITDylib &operator=(JITDylib &&) = delete;
+ ~JITDylib();
/// Get a reference to the ExecutionSession for this JITDylib.
+ ///
+ /// It is legal to call this method on a defunct JITDylib, however the result
+ /// will only usable if the ExecutionSession is still alive. If this JITDylib
+ /// is held by an error that may have torn down the JIT then the result
+ /// should not be used.
ExecutionSession &getExecutionSession() const { return ES; }
+ /// Dump current JITDylib state to OS.
+ ///
+ /// It is legal to call this method on a defunct JITDylib.
+ void dump(raw_ostream &OS);
+
/// Calls remove on all trackers currently associated with this JITDylib.
/// Does not run static deinits.
///
@@ -942,12 +969,21 @@ public:
/// added concurrently while the clear is underway, and the newly added
/// code will *not* be cleared. Adding new code concurrently with a clear
/// is usually a bug and should be avoided.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
Error clear();
/// Get the default resource tracker for this JITDylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
ResourceTrackerSP getDefaultResourceTracker();
/// Create a resource tracker for this JITDylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
ResourceTrackerSP createResourceTracker();
/// Adds a definition generator to this JITDylib and returns a referenece to
@@ -956,6 +992,9 @@ public:
/// When JITDylibs are searched during lookup, if no existing definition of
/// a symbol is found, then any generators that have been added are run (in
/// the order that they were added) to potentially generate a definition.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename GeneratorT>
GeneratorT &addGenerator(std::unique_ptr<GeneratorT> DefGenerator);
@@ -963,6 +1002,9 @@ public:
///
/// The given generator must exist in this JITDylib's generators list (i.e.
/// have been added and not yet removed).
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void removeGenerator(DefinitionGenerator &G);
/// Set the link order to be used when fixing up definitions in JITDylib.
@@ -983,26 +1025,41 @@ public:
/// as the first in the link order (instead of this dylib) ensures that
/// definitions within this dylib resolve to the lazy-compiling stubs,
/// rather than immediately materializing the definitions in this dylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void setLinkOrder(JITDylibSearchOrder NewSearchOrder,
bool LinkAgainstThisJITDylibFirst = true);
/// Add the given JITDylib to the link order for definitions in this
/// JITDylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void addToLinkOrder(JITDylib &JD,
JITDylibLookupFlags JDLookupFlags =
JITDylibLookupFlags::MatchExportedSymbolsOnly);
/// Replace OldJD with NewJD in the link order if OldJD is present.
/// Otherwise this operation is a no-op.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD,
JITDylibLookupFlags JDLookupFlags =
JITDylibLookupFlags::MatchExportedSymbolsOnly);
/// Remove the given JITDylib from the link order for this JITDylib if it is
/// present. Otherwise this operation is a no-op.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void removeFromLinkOrder(JITDylib &JD);
/// Do something with the link order (run under the session lock).
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename Func>
auto withLinkOrderDo(Func &&F)
-> decltype(F(std::declval<const JITDylibSearchOrder &>()));
@@ -1014,6 +1071,9 @@ public:
///
/// This overload always takes ownership of the MaterializationUnit. If any
/// errors occur, the MaterializationUnit consumed.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename MaterializationUnitType>
Error define(std::unique_ptr<MaterializationUnitType> &&MU,
ResourceTrackerSP RT = nullptr);
@@ -1025,6 +1085,9 @@ public:
/// generated. If an error occurs, ownership remains with the caller. This
/// may allow the caller to modify the MaterializationUnit to correct the
/// issue, then re-call define.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename MaterializationUnitType>
Error define(std::unique_ptr<MaterializationUnitType> &MU,
ResourceTrackerSP RT = nullptr);
@@ -1039,28 +1102,40 @@ public:
///
/// On success, all symbols are removed. On failure, the JITDylib state is
/// left unmodified (no symbols are removed).
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
Error remove(const SymbolNameSet &Names);
- /// Dump current JITDylib state to OS.
- void dump(raw_ostream &OS);
-
/// Returns the given JITDylibs and all of their transitive dependencies in
/// DFS order (based on linkage relationships). Each JITDylib will appear
/// only once.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
static std::vector<JITDylibSP> getDFSLinkOrder(ArrayRef<JITDylibSP> JDs);
/// Returns the given JITDylibs and all of their transitive dependensies in
/// reverse DFS order (based on linkage relationships). Each JITDylib will
/// appear only once.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
static std::vector<JITDylibSP>
getReverseDFSLinkOrder(ArrayRef<JITDylibSP> JDs);
/// Return this JITDylib and its transitive dependencies in DFS order
/// based on linkage relationships.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
std::vector<JITDylibSP> getDFSLinkOrder();
/// Rteurn this JITDylib and its transitive dependencies in reverse DFS order
/// based on linkage relationships.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
std::vector<JITDylibSP> getReverseDFSLinkOrder();
private:
@@ -1151,7 +1226,6 @@ private:
JITDylib(ExecutionSession &ES, std::string Name);
- ResourceTrackerSP getTracker(MaterializationResponsibility &MR);
std::pair<AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>>
removeTracker(ResourceTracker &RT);
@@ -1197,8 +1271,8 @@ private:
failSymbols(FailedSymbolsWorklist);
ExecutionSession &ES;
+ enum { Open, Closing, Closed } State = Open;
std::mutex GeneratorsMutex;
- bool Open = true;
SymbolTable Symbols;
UnmaterializedInfosMap UnmaterializedInfos;
MaterializingInfosMap MaterializingInfos;
@@ -1208,7 +1282,8 @@ private:
// Map trackers to sets of symbols tracked.
DenseMap<ResourceTracker *, SymbolNameVector> TrackerSymbols;
- DenseMap<MaterializationResponsibility *, ResourceTracker *> MRTrackers;
+ DenseMap<ResourceTracker *, DenseSet<MaterializationResponsibility *>>
+ TrackerMRs;
};
/// Platforms set up standard symbols and mediate interactions between dynamic
@@ -1363,6 +1438,18 @@ public:
/// If no Platform is attached this call is equivalent to createBareJITDylib.
Expected<JITDylib &> createJITDylib(std::string Name);
+ /// Closes the given JITDylib.
+ ///
+ /// This method clears all resources held for the JITDylib, puts it in the
+ /// closed state, and clears all references held by the ExecutionSession and
+ /// other JITDylibs. No further code can be added to the JITDylib, and the
+ /// object will be freed once any remaining JITDylibSPs to it are destroyed.
+ ///
+ /// This method does *not* run static destructors.
+ ///
+ /// This method can only be called once for each JITDylib.
+ Error removeJITDylib(JITDylib &JD);
+
/// Set the error reporter function.
ExecutionSession &setErrorReporter(ErrorReporter ReportError) {
this->ReportError = std::move(ReportError);
@@ -1574,9 +1661,9 @@ private:
SymbolStringPtr InitSymbol) {
auto &JD = RT.getJITDylib();
std::unique_ptr<MaterializationResponsibility> MR(
- new MaterializationResponsibility(&JD, std::move(Symbols),
+ new MaterializationResponsibility(&RT, std::move(Symbols),
std::move(InitSymbol)));
- JD.MRTrackers[MR.get()] = &RT;
+ JD.TrackerMRs[&RT].insert(MR.get());
return MR;
}
@@ -1660,18 +1747,17 @@ private:
JITDispatchHandlers;
};
-inline ExecutionSession &MaterializationResponsibility::getExecutionSession() {
- return JD->getExecutionSession();
+inline ExecutionSession &
+MaterializationResponsibility::getExecutionSession() const {
+ return JD.getExecutionSession();
}
template <typename Func>
Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const {
- return JD->getExecutionSession().runSessionLocked([&]() -> Error {
- auto I = JD->MRTrackers.find(this);
- assert(I != JD->MRTrackers.end() && "No tracker for this MR");
- if (I->second->isDefunct())
- return make_error<ResourceTrackerDefunct>(I->second);
- F(I->second->getKeyUnsafe());
+ return JD.getExecutionSession().runSessionLocked([&]() -> Error {
+ if (RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(RT);
+ F(RT->getKeyUnsafe());
return Error::success();
});
}
@@ -1679,14 +1765,17 @@ Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const {
template <typename GeneratorT>
GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) {
auto &G = *DefGenerator;
- std::lock_guard<std::mutex> Lock(GeneratorsMutex);
- DefGenerators.push_back(std::move(DefGenerator));
+ ES.runSessionLocked([&] {
+ assert(State == Open && "Cannot add generator to closed JITDylib");
+ DefGenerators.push_back(std::move(DefGenerator));
+ });
return G;
}
template <typename Func>
auto JITDylib::withLinkOrderDo(Func &&F)
-> decltype(F(std::declval<const JITDylibSearchOrder &>())) {
+ assert(State == Open && "Cannot use link order of closed JITDylib");
return ES.runSessionLocked([&]() { return F(LinkOrder); });
}
@@ -1715,6 +1804,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU,
});
return ES.runSessionLocked([&, this]() -> Error {
+ assert(State == Open && "JD is defunct");
+
if (auto Err = defineImpl(*MU))
return Err;
@@ -1756,6 +1847,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU,
});
return ES.runSessionLocked([&, this]() -> Error {
+ assert(State == Open && "JD is defunct");
+
if (auto Err = defineImpl(*MU))
return Err;
@@ -1800,50 +1893,50 @@ private:
// ---------------------------------------------
inline MaterializationResponsibility::~MaterializationResponsibility() {
- JD->getExecutionSession().OL_destroyMaterializationResponsibility(*this);
+ getExecutionSession().OL_destroyMaterializationResponsibility(*this);
}
inline SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
- return JD->getExecutionSession().OL_getRequestedSymbols(*this);
+ return getExecutionSession().OL_getRequestedSymbols(*this);
}
inline Error MaterializationResponsibility::notifyResolved(
const SymbolMap &Symbols) {
- return JD->getExecutionSession().OL_notifyResolved(*this, Symbols);
+ return getExecutionSession().OL_notifyResolved(*this, Symbols);
}
inline Error MaterializationResponsibility::notifyEmitted() {
- return JD->getExecutionSession().OL_notifyEmitted(*this);
+ return getExecutionSession().OL_notifyEmitted(*this);
}
inline Error MaterializationResponsibility::defineMaterializing(
SymbolFlagsMap SymbolFlags) {
- return JD->getExecutionSession().OL_defineMaterializing(
- *this, std::move(SymbolFlags));
+ return getExecutionSession().OL_defineMaterializing(*this,
+ std::move(SymbolFlags));
}
inline void MaterializationResponsibility::failMaterialization() {
- JD->getExecutionSession().OL_notifyFailed(*this);
+ getExecutionSession().OL_notifyFailed(*this);
}
inline Error MaterializationResponsibility::replace(
std::unique_ptr<MaterializationUnit> MU) {
- return JD->getExecutionSession().OL_replace(*this, std::move(MU));
+ return getExecutionSession().OL_replace(*this, std::move(MU));
}
inline Expected<std::unique_ptr<MaterializationResponsibility>>
MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) {
- return JD->getExecutionSession().OL_delegate(*this, Symbols);
+ return getExecutionSession().OL_delegate(*this, Symbols);
}
inline void MaterializationResponsibility::addDependencies(
const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) {
- JD->getExecutionSession().OL_addDependencies(*this, Name, Dependencies);
+ getExecutionSession().OL_addDependencies(*this, Name, Dependencies);
}
inline void MaterializationResponsibility::addDependenciesForAll(
const SymbolDependenceMap &Dependencies) {
- JD->getExecutionSession().OL_addDependenciesForAll(*this, Dependencies);
+ getExecutionSession().OL_addDependenciesForAll(*this, Dependencies);
}
} // End namespace orc
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 2fec3e7e4230..d2f9bac16e5a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -120,6 +120,10 @@ enum class OMPScheduleType {
Runtime = 37,
Auto = 38, // auto
+ StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd)
+ GuidedSimd = 46, // guided with chunk adjustment
+ RuntimeSimd = 47, // runtime with chunk adjustment
+
ModifierMonotonic =
(1 << 29), // Set if the monotonic schedule modifier was present
ModifierNonmonotonic =
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index b4e099e4ec20..bcf52278ccbb 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1670,32 +1670,6 @@ public:
return CreateAlignedLoad(Ty, Ptr, MaybeAlign(), isVolatile, Name);
}
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr,
- const char *Name),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
- }
-
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
- }
-
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr,
- bool isVolatile,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, isVolatile,
- Name);
- }
-
StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
return CreateAlignedStore(Val, Ptr, MaybeAlign(), isVolatile);
}
@@ -1719,35 +1693,6 @@ public:
return Insert(new LoadInst(Ty, Ptr, Twine(), isVolatile, *Align), Name);
}
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr,
- MaybeAlign Align,
- const char *Name),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
- Align, Name);
- }
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr,
- MaybeAlign Align,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
- Align, Name);
- }
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr,
- MaybeAlign Align,
- bool isVolatile,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
- Align, isVolatile, Name);
- }
-
StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align,
bool isVolatile = false) {
if (!Align) {
@@ -1788,14 +1733,6 @@ public:
return Insert(new AtomicRMWInst(Op, Ptr, Val, *Align, Ordering, SSID));
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateGEP(Value *Ptr, ArrayRef<Value *> IdxList,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateGEP(Ptr->getType()->getScalarType()->getPointerElementType(),
- Ptr, IdxList, Name);
- }
-
Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
const Twine &Name = "") {
if (auto *PC = dyn_cast<Constant>(Ptr)) {
@@ -1810,15 +1747,6 @@ public:
return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateInBoundsGEP(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList,
- Name);
- }
-
Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
const Twine &Name = "") {
if (auto *PC = dyn_cast<Constant>(Ptr)) {
@@ -1849,15 +1777,6 @@ public:
return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstGEP1_32(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Name);
- }
-
Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0,
const Twine &Name = "") {
Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
@@ -1914,15 +1833,6 @@ public:
return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstGEP1_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Name);
- }
-
Value *CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0,
const Twine &Name = "") {
Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
@@ -1933,15 +1843,6 @@ public:
return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstInBoundsGEP1_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Name);
- }
-
Value *CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1,
const Twine &Name = "") {
Value *Idxs[] = {
@@ -1955,15 +1856,6 @@ public:
return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstGEP2_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Idx1, Name);
- }
-
Value *CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0,
uint64_t Idx1, const Twine &Name = "") {
Value *Idxs[] = {
@@ -1977,28 +1869,11 @@ public:
return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0,
- uint64_t Idx1, const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstInBoundsGEP2_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Idx1, Name);
- }
-
Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx,
const Twine &Name = "") {
return CreateConstInBoundsGEP2_32(Ty, Ptr, 0, Idx, Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstInBoundsGEP2_32(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, 0, Idx,
- Name);
- }
-
/// Same as CreateGlobalString, but return a pointer with "i8*" type
/// instead of a pointer to array of i8.
///
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 6d32a898b668..046e9b5e809e 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -975,15 +975,6 @@ public:
NameStr, InsertAtEnd);
}
- LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds(
- Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr = "",
- Instruction *InsertBefore = nullptr),
- "Use the version with explicit element type instead") {
- return CreateInBounds(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList,
- NameStr, InsertBefore);
- }
-
/// Create an "inbounds" getelementptr. See the documentation for the
/// "inbounds" flag in LangRef.html for details.
static GetElementPtrInst *
@@ -996,15 +987,6 @@ public:
return GEP;
}
- LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds(
- Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr,
- BasicBlock *InsertAtEnd),
- "Use the version with explicit element type instead") {
- return CreateInBounds(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList,
- NameStr, InsertAtEnd);
- }
-
static GetElementPtrInst *CreateInBounds(Type *PointeeType, Value *Ptr,
ArrayRef<Value *> IdxList,
const Twine &NameStr,
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 8290342c0d51..b01fa10763b8 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -524,6 +524,20 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">,
Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>;
+ // BCD intrinsics.
+ def int_ppc_bcdadd : GCCBuiltin<"__builtin_ppc_bcdadd">, Intrinsic<
+ [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_bcdadd_p : GCCBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic<
+ [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<0>>]>;
+ def int_ppc_bcdsub : GCCBuiltin<"__builtin_ppc_bcdsub">, Intrinsic<
+ [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_bcdsub_p : GCCBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic<
+ [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<0>>]>;
+
// P10 Vector Extract with Mask
def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -1073,6 +1087,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_crypto_vpermxor_be :
+ GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+ llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vshasigmad :
GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index b83d83f0d0ab..7d232bba0864 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -250,8 +250,16 @@ public:
bool operator!=(const FastMathFlags &OtherFlags) const {
return Flags != OtherFlags.Flags;
}
+
+ /// Print fast-math flags to \p O.
+ void print(raw_ostream &O) const;
};
+inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) {
+ FMF.print(O);
+ return O;
+}
+
/// Utility class for floating point operations which can have
/// information about relaxed accuracy requirements attached to them.
class FPMathOperator : public Operator {
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index b858733530e3..320deb80bb1f 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2285,6 +2285,31 @@ m_Not(const ValTy &V) {
return m_c_Xor(V, m_AllOnes());
}
+template <typename ValTy> struct NotForbidUndef_match {
+ ValTy Val;
+ NotForbidUndef_match(const ValTy &V) : Val(V) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ // We do not use m_c_Xor because that could match an arbitrary APInt that is
+ // not -1 as C and then fail to match the other operand if it is -1.
+ // This code should still work even when both operands are constants.
+ Value *X;
+ const APInt *C;
+ if (m_Xor(m_Value(X), m_APIntForbidUndef(C)).match(V) && C->isAllOnes())
+ return Val.match(X);
+ if (m_Xor(m_APIntForbidUndef(C), m_Value(X)).match(V) && C->isAllOnes())
+ return Val.match(X);
+ return false;
+ }
+};
+
+/// Matches a bitwise 'not' as 'xor V, -1' or 'xor -1, V'. For vectors, the
+/// constant value must be composed of only -1 scalar elements.
+template <typename ValTy>
+inline NotForbidUndef_match<ValTy> m_NotForbidUndef(const ValTy &V) {
+ return NotForbidUndef_match<ValTy>(V);
+}
+
/// Matches an SMin with LHS and RHS in either order.
template <typename LHS, typename RHS>
inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 47431adc6fac..c899c46d4055 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -368,6 +368,8 @@ public:
Type *getPointerElementType() const {
assert(getTypeID() == PointerTyID);
+ assert(NumContainedTys &&
+ "Attempting to get element type of opaque pointer");
return ContainedTys[0];
}
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 361d6357b303..a3c6b4e70bf5 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -38,7 +38,7 @@
// is one VP intrinsic that maps directly to one SDNode that goes by the
// same name. Since the operands are also the same, we open the property
// scopes for both the VPIntrinsic and the SDNode at once.
-// \p SDOPC The SelectionDAG Node id (eg VP_ADD).
+// \p VPSD The SelectionDAG Node id (eg VP_ADD).
// \p LEGALPOS The operand position of the SDNode that is used for legalizing
// this SDNode. This can be `-1`, in which case the return type of
// the SDNode is used.
@@ -46,12 +46,12 @@
// \p MASKPOS The mask operand position.
// \p EVLPOS The explicit vector length operand position.
#ifndef BEGIN_REGISTER_VP_SDNODE
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS)
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS)
#endif
// End the property scope of a new VP SDNode.
#ifndef END_REGISTER_VP_SDNODE
-#define END_REGISTER_VP_SDNODE(SDOPC)
+#define END_REGISTER_VP_SDNODE(VPSD)
#endif
// Helper macros for the common "1:1 - Intrinsic : SDNode" case.
@@ -60,22 +60,21 @@
// same name. Since the operands are also the same, we open the property
// scopes for both the VPIntrinsic and the SDNode at once.
//
-// \p INTRIN The canonical name (eg `vp_add`, which at the same time is the
+// \p VPID The canonical name (eg `vp_add`, which at the same time is the
// name of the intrinsic and the TableGen def of the SDNode).
// \p MASKPOS The mask operand position.
// \p EVLPOS The explicit vector length operand position.
-// \p SDOPC The SelectionDAG Node id (eg VP_ADD).
+// \p VPSD The SelectionDAG Node id (eg VP_ADD).
// \p LEGALPOS The operand position of the SDNode that is used for legalizing
// this SDNode. This can be `-1`, in which case the return type of
// the SDNode is used.
-#define BEGIN_REGISTER_VP(INTRIN, MASKPOS, EVLPOS, SDOPC, LEGALPOS) \
-BEGIN_REGISTER_VP_INTRINSIC(INTRIN, MASKPOS, EVLPOS) \
-BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, INTRIN, MASKPOS, EVLPOS)
-
-#define END_REGISTER_VP(INTRIN, SDOPC) \
-END_REGISTER_VP_INTRINSIC(INTRIN) \
-END_REGISTER_VP_SDNODE(SDOPC)
+#define BEGIN_REGISTER_VP(VPID, MASKPOS, EVLPOS, VPSD, LEGALPOS) \
+ BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS) \
+ BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS)
+#define END_REGISTER_VP(VPID, VPSD) \
+ END_REGISTER_VP_INTRINSIC(VPID) \
+ END_REGISTER_VP_SDNODE(VPSD)
// The following macros attach properties to the scope they are placed in. This
// assigns the property to the VP Intrinsic and/or SDNode that belongs to the
@@ -84,9 +83,9 @@ END_REGISTER_VP_SDNODE(SDOPC)
// Property Macros {
// The intrinsic and/or SDNode has the same function as this LLVM IR Opcode.
-// \p OPC The standard IR opcode.
-#ifndef HANDLE_VP_TO_OPC
-#define HANDLE_VP_TO_OPC(OPC)
+// \p OPC The opcode of the instruction with the same function.
+#ifndef VP_PROPERTY_FUNCTIONAL_OPC
+#define VP_PROPERTY_FUNCTIONAL_OPC(OPC)
#endif
// Whether the intrinsic may have a rounding mode or exception behavior operand
@@ -96,34 +95,30 @@ END_REGISTER_VP_SDNODE(SDOPC)
// \p HASEXCEPT '1' if the intrinsic can have an exception behavior operand
// bundle, '0' otherwise.
// \p INTRINID The constrained fp intrinsic this VP intrinsic corresponds to.
-#ifndef HANDLE_VP_TO_CONSTRAINEDFP
-#define HANDLE_VP_TO_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID)
+#ifndef VP_PROPERTY_CONSTRAINEDFP
+#define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID)
#endif
// Map this VP intrinsic to its canonical functional intrinsic.
-#ifndef HANDLE_VP_TO_INTRIN
-#define HANDLE_VP_TO_INTRIN(ID)
+// \p INTRIN The non-VP intrinsics with the same function.
+#ifndef VP_PROPERTY_FUNCTIONAL_INTRINSIC
+#define VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN)
#endif
// This VP Intrinsic is a memory operation
// The pointer arg is at POINTERPOS and the data arg is at DATAPOS.
-#ifndef HANDLE_VP_IS_MEMOP
-#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS)
+#ifndef VP_PROPERTY_MEMOP
+#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS)
#endif
// Map this VP reduction intrinsic to its reduction operand positions.
-#ifndef HANDLE_VP_REDUCTION
-#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS)
+#ifndef VP_PROPERTY_REDUCTION
+#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS)
#endif
// A property to infer VP binary-op SDNode opcodes automatically.
-#ifndef PROPERTY_VP_BINARYOP_SDNODE
-#define PROPERTY_VP_BINARYOP_SDNODE(ID)
-#endif
-
-// A property to infer VP reduction SDNode opcodes automatically.
-#ifndef PROPERTY_VP_REDUCTION_SDNODE
-#define PROPERTY_VP_REDUCTION_SDNODE(ID)
+#ifndef VP_PROPERTY_BINARYOP
+#define VP_PROPERTY_BINARYOP
#endif
/// } Property Macros
@@ -132,15 +127,14 @@ END_REGISTER_VP_SDNODE(SDOPC)
// Specialized helper macro for integer binary operators (%x, %y, %mask, %evl).
#ifdef HELPER_REGISTER_BINARY_INT_VP
-#error "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!"
+#error \
+ "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!"
#endif
-#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \
-BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \
-HANDLE_VP_TO_OPC(OPC) \
-PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
-END_REGISTER_VP(INTRIN, SDOPC)
-
-
+#define HELPER_REGISTER_BINARY_INT_VP(VPID, VPSD, IROPC) \
+ BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_BINARYOP \
+ END_REGISTER_VP(VPID, VPSD)
// llvm.vp.add(x,y,mask,vlen)
HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add)
@@ -193,12 +187,12 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor)
#error \
"The internal helper macro HELPER_REGISTER_BINARY_FP_VP is already defined!"
#endif
-#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, SDOPC, OPC) \
- BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1) \
- HANDLE_VP_TO_OPC(OPC) \
- HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \
- PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
- END_REGISTER_VP(vp_##OPSUFFIX, SDOPC)
+#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, VPSD, IROPC) \
+ BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \
+ VP_PROPERTY_BINARYOP \
+ END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
// llvm.vp.fadd(x,y,mask,vlen)
HELPER_REGISTER_BINARY_FP_VP(fadd, VP_FADD, FAdd)
@@ -224,34 +218,34 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
// chain = VP_STORE chain,val,base,offset,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5)
-HANDLE_VP_TO_OPC(Store)
-HANDLE_VP_TO_INTRIN(masked_store)
-HANDLE_VP_IS_MEMOP(vp_store, 1, 0)
+VP_PROPERTY_FUNCTIONAL_OPC(Store)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_store)
+VP_PROPERTY_MEMOP(1, 0)
END_REGISTER_VP(vp_store, VP_STORE)
// llvm.vp.scatter(ptr,val,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
// chain = VP_SCATTER chain,val,base,indices,scale,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6)
-HANDLE_VP_TO_INTRIN(masked_scatter)
-HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_scatter)
+VP_PROPERTY_MEMOP(1, 0)
END_REGISTER_VP(vp_scatter, VP_SCATTER)
// llvm.vp.load(ptr,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2)
// val,chain = VP_LOAD chain,base,offset,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4)
-HANDLE_VP_TO_OPC(Load)
-HANDLE_VP_TO_INTRIN(masked_load)
-HANDLE_VP_IS_MEMOP(vp_load, 0, None)
+VP_PROPERTY_FUNCTIONAL_OPC(Load)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load)
+VP_PROPERTY_MEMOP(0, None)
END_REGISTER_VP(vp_load, VP_LOAD)
// llvm.vp.gather(ptr,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2)
// val,chain = VP_GATHER chain,base,indices,scale,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5)
-HANDLE_VP_TO_INTRIN(masked_gather)
-HANDLE_VP_IS_MEMOP(vp_gather, 0, None)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_gather)
+VP_PROPERTY_MEMOP(0, None)
END_REGISTER_VP(vp_gather, VP_GATHER)
///// } Memory Operations
@@ -260,14 +254,14 @@ END_REGISTER_VP(vp_gather, VP_GATHER)
// Specialized helper macro for VP reductions (%start, %x, %mask, %evl).
#ifdef HELPER_REGISTER_REDUCTION_VP
-#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
+#error \
+ "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
#endif
-#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \
-BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \
-HANDLE_VP_TO_INTRIN(INTRIN) \
-HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
-PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
-END_REGISTER_VP(VPINTRIN, SDOPC)
+#define HELPER_REGISTER_REDUCTION_VP(VPID, VPSD, INTRIN) \
+ BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
+ VP_PROPERTY_REDUCTION(0, 1) \
+ END_REGISTER_VP(VPID, VPSD)
// llvm.vp.reduce.add(start,x,mask,vlen)
HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD,
@@ -320,19 +314,19 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
// fast-math flags in the IR and as two distinct ISD opcodes in the
// SelectionDAG.
#ifdef HELPER_REGISTER_REDUCTION_SEQ_VP
-#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
+#error \
+ "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
#endif
-#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \
-BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \
-BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \
-END_REGISTER_VP_SDNODE(SDOPC) \
-BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \
-END_REGISTER_VP_SDNODE(SEQ_SDOPC) \
-HANDLE_VP_TO_INTRIN(INTRIN) \
-HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
-PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
-PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \
-END_REGISTER_VP_INTRINSIC(VPINTRIN)
+#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, INTRIN) \
+ BEGIN_REGISTER_VP_INTRINSIC(VPID, 2, 3) \
+ BEGIN_REGISTER_VP_SDNODE(VPSD, -1, VPID, 2, 3) \
+ VP_PROPERTY_REDUCTION(0, 1) \
+ END_REGISTER_VP_SDNODE(VPSD) \
+ BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, -1, VPID, 2, 3) \
+ VP_PROPERTY_REDUCTION(0, 1) \
+ END_REGISTER_VP_SDNODE(SEQ_VPSD) \
+ VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
+ END_REGISTER_VP_INTRINSIC(VPID)
// llvm.vp.reduce.fadd(start,x,mask,vlen)
HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD,
@@ -356,8 +350,7 @@ BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3)
// END_REGISTER_CASES(vp_select, VP_SELECT)
END_REGISTER_VP_INTRINSIC(vp_select)
-BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5,
- EXPERIMENTAL_VP_SPLICE, -1)
+BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1)
END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
///// } Shuffles
@@ -368,10 +361,9 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
#undef END_REGISTER_VP
#undef END_REGISTER_VP_INTRINSIC
#undef END_REGISTER_VP_SDNODE
-#undef HANDLE_VP_TO_OPC
-#undef HANDLE_VP_TO_CONSTRAINEDFP
-#undef HANDLE_VP_TO_INTRIN
-#undef HANDLE_VP_IS_MEMOP
-#undef HANDLE_VP_REDUCTION
-#undef PROPERTY_VP_BINARYOP_SDNODE
-#undef PROPERTY_VP_REDUCTION_SDNODE
+#undef VP_PROPERTY_BINARYOP
+#undef VP_PROPERTY_CONSTRAINEDFP
+#undef VP_PROPERTY_FUNCTIONAL_INTRINSIC
+#undef VP_PROPERTY_FUNCTIONAL_OPC
+#undef VP_PROPERTY_MEMOP
+#undef VP_PROPERTY_REDUCTION
diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h
index f4381d2ae4a9..52a4c7b4301f 100644
--- a/llvm/include/llvm/IR/Verifier.h
+++ b/llvm/include/llvm/IR/Verifier.h
@@ -6,9 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the function verifier interface, that can be used for some
-// sanity checking of input to the system, and for checking that transformations
-// haven't done something bad.
+// This file defines the function verifier interface, that can be used for
+// validation checking of input to the system, and for checking that
+// transformations haven't done something bad.
//
// Note that this does not provide full 'java style' security and verifications,
// instead it just tries to ensure that code is well formed.
diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
index 2b0f391570cd..8c0ad2699b8d 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -264,9 +264,10 @@ public:
// Update the ready queues.
void dump() const;
- // This routine performs a sanity check. This routine should only be called
- // when we know that 'IR' is not in the scheduler's instruction queues.
- void sanityCheck(const InstRef &IR) const {
+ // This routine performs a basic correctness check. This routine should only
+ // be called when we know that 'IR' is not in the scheduler's instruction
+ // queues.
+ void instructionCheck(const InstRef &IR) const {
assert(!is_contained(WaitSet, IR) && "Already in the wait set!");
assert(!is_contained(ReadySet, IR) && "Already in the ready set!");
assert(!is_contained(IssuedSet, IR) && "Already executing!");
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index ee89f4eac61f..38a7de3d6131 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -121,6 +121,7 @@ struct LinkEditData {
MachOYAML::ExportEntry ExportTrie;
std::vector<NListEntry> NameList;
std::vector<StringRef> StringTable;
+ std::vector<yaml::Hex32> IndirectSymbols;
bool isEmpty() const;
};
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
new file mode 100644
index 000000000000..d64227e4ba31
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -0,0 +1,61 @@
+#ifndef MEMPROF_DATA_INC
+#define MEMPROF_DATA_INC
+/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+/*
+ * This is the main file that defines all the data structure, signature,
+ * constant literals that are shared across profiling runtime library,
+ * and host tools (reader/writer).
+ *
+ * This file has two identical copies. The primary copy lives in LLVM and
+ * the other one sits in compiler-rt/include/profile directory. To make changes
+ * in this file, first modify the primary copy and copy it over to compiler-rt.
+ * Testing of any change in this file can start only after the two copies are
+ * synced up.
+ *
+\*===----------------------------------------------------------------------===*/
+
+
+#ifdef _MSC_VER
+#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop))
+#else
+#define PACKED(__decl__) __decl__ __attribute__((__packed__))
+#endif
+
+// A 64-bit magic number to uniquely identify the raw binary memprof profile file.
+#define MEMPROF_RAW_MAGIC_64 \
+ ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \
+ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
+
+// The version number of the raw binary format.
+#define MEMPROF_RAW_VERSION 1ULL
+
+namespace llvm {
+namespace memprof {
+// A struct describing the header used for the raw binary memprof profile format.
+PACKED(struct Header {
+ uint64_t Magic;
+ uint64_t Version;
+ uint64_t TotalSize;
+ uint64_t SegmentOffset;
+ uint64_t MIBOffset;
+ uint64_t StackOffset;
+});
+
+// A struct describing the information necessary to describe a /proc/maps
+// segment entry for a particular binary/library identified by its build id.
+PACKED(struct SegmentEntry {
+ uint64_t Start;
+ uint64_t End;
+ uint64_t Offset;
+ uint8_t BuildId[32];
+});
+} // namespace memprof
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
new file mode 100644
index 000000000000..45544927a86f
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -0,0 +1,43 @@
+#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
+#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
+//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading MemProf profiling data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+namespace memprof {
+
+class RawMemProfReader {
+public:
+ RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
+ : DataBuffer(std::move(DataBuffer)) {}
+ // Prints aggregate counts for each raw profile parsed from the DataBuffer.
+ void printSummaries(raw_ostream &OS) const;
+
+ // Return true if the \p DataBuffer starts with magic bytes indicating it is
+ // a raw binary memprof profile.
+ static bool hasFormat(const MemoryBuffer &DataBuffer);
+
+ // Create a RawMemProfReader after sanity checking the contents of the file at
+ // \p Path.
+ static Expected<std::unique_ptr<RawMemProfReader>> create(const Twine &Path);
+
+private:
+ std::unique_ptr<MemoryBuffer> DataBuffer;
+};
+
+} // namespace memprof
+} // namespace llvm
+
+#endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index b3cfb71601f1..48e82fa55a0f 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -181,7 +181,8 @@ AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
(AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM |
AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML |
- AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16))
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+ AArch64::AEK_BF16))
AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_LSE))
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h
index 131a58412db6..15bb428f19bc 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -137,15 +137,6 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
bool isX18ReservedByDefault(const Triple &TT);
-struct ParsedBranchProtection {
- StringRef Scope;
- StringRef Key;
- bool BranchTargetEnforcement;
-};
-
-bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
- StringRef &Err);
-
} // namespace AArch64
} // namespace llvm
diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h
index 5d12b7e08d58..b46a4d9f690f 100644
--- a/llvm/include/llvm/Support/ARMAttributeParser.h
+++ b/llvm/include/llvm/Support/ARMAttributeParser.h
@@ -67,6 +67,10 @@ class ARMAttributeParser : public ELFAttributeParser {
Error DSP_extension(ARMBuildAttrs::AttrType tag);
Error T2EE_use(ARMBuildAttrs::AttrType tag);
Error Virtualization_use(ARMBuildAttrs::AttrType tag);
+ Error PAC_extension(ARMBuildAttrs::AttrType tag);
+ Error BTI_extension(ARMBuildAttrs::AttrType tag);
+ Error PACRET_use(ARMBuildAttrs::AttrType tag);
+ Error BTI_use(ARMBuildAttrs::AttrType tag);
Error nodefaults(ARMBuildAttrs::AttrType tag);
public:
diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h
index 37c37522fd26..b4405e7d4908 100644
--- a/llvm/include/llvm/Support/ARMBuildAttributes.h
+++ b/llvm/include/llvm/Support/ARMBuildAttributes.h
@@ -70,9 +70,13 @@ enum AttrType : unsigned {
DIV_use = 44,
DSP_extension = 46,
MVE_arch = 48,
+ PAC_extension = 50,
+ BTI_extension = 52,
also_compatible_with = 65,
conformance = 67,
Virtualization_use = 68,
+ BTI_use = 74,
+ PACRET_use = 76,
/// Legacy Tags
Section = 2, // deprecated (ABI r2.09)
@@ -237,7 +241,25 @@ enum {
// Tag_Virtualization_use, (=68), uleb128
AllowTZ = 1,
AllowVirtualization = 2,
- AllowTZVirtualization = 3
+ AllowTZVirtualization = 3,
+
+ // Tag_PAC_extension, (=50), uleb128
+ DisallowPAC = 0,
+ AllowPACInNOPSpace = 1,
+ AllowPAC = 2,
+
+ // Tag_BTI_extension, (=52), uleb128
+ DisallowBTI = 0,
+ AllowBTIInNOPSpace = 1,
+ AllowBTI = 2,
+
+ // Tag_BTI_use, (=74), uleb128
+ BTINotUsed = 0,
+ BTIUsed = 1,
+
+ // Tag_PACRET_use, (=76), uleb128
+ PACRETNotUsed = 0,
+ PACRETUsed = 1
};
} // namespace ARMBuildAttrs
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index fd08f3e6960c..7d29808f0501 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -201,6 +201,7 @@ ARM_ARCH_EXT_NAME("cdecp4", ARM::AEK_CDECP4, "+cdecp4", "-cdecp4")
ARM_ARCH_EXT_NAME("cdecp5", ARM::AEK_CDECP5, "+cdecp5", "-cdecp5")
ARM_ARCH_EXT_NAME("cdecp6", ARM::AEK_CDECP6, "+cdecp6", "-cdecp6")
ARM_ARCH_EXT_NAME("cdecp7", ARM::AEK_CDECP7, "+cdecp7", "-cdecp7")
+ARM_ARCH_EXT_NAME("pacbti", ARM::AEK_PACBTI, "+pacbti", "-pacbti")
#undef ARM_ARCH_EXT_NAME
#ifndef ARM_HW_DIV_NAME
diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h
index b1ffcfb34552..b40704c24e87 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.h
+++ b/llvm/include/llvm/Support/ARMTargetParser.h
@@ -59,7 +59,7 @@ enum ArchExtKind : uint64_t {
AEK_CDECP5 = 1 << 27,
AEK_CDECP6 = 1 << 28,
AEK_CDECP7 = 1 << 29,
-
+ AEK_PACBTI = 1 << 30,
// Unsupported extensions.
AEK_OS = 1ULL << 59,
AEK_IWMMXT = 1ULL << 60,
diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h
index 21fd50763b1f..f39400c26eab 100644
--- a/llvm/include/llvm/Support/GenericDomTree.h
+++ b/llvm/include/llvm/Support/GenericDomTree.h
@@ -528,9 +528,9 @@ protected:
/// of CFG edges must not delete the CFG nodes before calling this function.
///
/// The applyUpdates function can reorder the updates and remove redundant
- /// ones internally. The batch updater is also able to detect sequences of
- /// zero and exactly one update -- it's optimized to do less work in these
- /// cases.
+ /// ones internally (as long as it is done in a deterministic fashion). The
+ /// batch updater is also able to detect sequences of zero and exactly one
+ /// update -- it's optimized to do less work in these cases.
///
/// Note that for postdominators it automatically takes care of applying
/// updates on reverse edges internally (so there's no need to swap the
@@ -538,8 +538,8 @@ protected:
/// The type of updates is the same for DomTreeBase<T> and PostDomTreeBase<T>
/// with the same template parameter T.
///
- /// \param Updates An unordered sequence of updates to perform. The current
- /// CFG and the reverse of these updates provides the pre-view of the CFG.
+ /// \param Updates An ordered sequence of updates to perform. The current CFG
+ /// and the reverse of these updates provides the pre-view of the CFG.
///
void applyUpdates(ArrayRef<UpdateType> Updates) {
GraphDiff<NodePtr, IsPostDominator> PreViewCFG(
@@ -547,9 +547,9 @@ protected:
DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr);
}
- /// \param Updates An unordered sequence of updates to perform. The current
- /// CFG and the reverse of these updates provides the pre-view of the CFG.
- /// \param PostViewUpdates An unordered sequence of update to perform in order
+ /// \param Updates An ordered sequence of updates to perform. The current CFG
+ /// and the reverse of these updates provides the pre-view of the CFG.
+ /// \param PostViewUpdates An ordered sequence of update to perform in order
/// to obtain a post-view of the CFG. The DT will be updated assuming the
/// obtained PostViewCFG is the desired end state.
void applyUpdates(ArrayRef<UpdateType> Updates,
diff --git a/llvm/include/llvm/Support/HTTPClient.h b/llvm/include/llvm/Support/HTTPClient.h
new file mode 100644
index 000000000000..3172610c2d8b
--- /dev/null
+++ b/llvm/include/llvm/Support/HTTPClient.h
@@ -0,0 +1,113 @@
+//===-- llvm/Support/HTTPClient.h - HTTP client library ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declarations of the HTTPClient, HTTPMethod,
+/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as
+/// the HTTPResponseBuffer and HTTPRequest structs.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_HTTP_CLIENT_H
+#define LLVM_SUPPORT_HTTP_CLIENT_H
+
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+
+enum class HTTPMethod { GET };
+
+/// A stateless description of an outbound HTTP request.
+struct HTTPRequest {
+ SmallString<128> Url;
+ HTTPMethod Method = HTTPMethod::GET;
+ bool FollowRedirects = true;
+ HTTPRequest(StringRef Url);
+};
+
+bool operator==(const HTTPRequest &A, const HTTPRequest &B);
+
+/// A handler for state updates occurring while an HTTPRequest is performed.
+/// Can trigger the client to abort the request by returning an Error from any
+/// of its methods.
+class HTTPResponseHandler {
+public:
+ /// Processes one line of HTTP response headers.
+ virtual Error handleHeaderLine(StringRef HeaderLine) = 0;
+
+ /// Processes an additional chunk of bytes of the HTTP response body.
+ virtual Error handleBodyChunk(StringRef BodyChunk) = 0;
+
+ /// Processes the HTTP response status code.
+ virtual Error handleStatusCode(unsigned Code) = 0;
+
+protected:
+ ~HTTPResponseHandler();
+};
+
+/// An HTTP response status code bundled with a buffer to store the body.
+struct HTTPResponseBuffer {
+ unsigned Code = 0;
+ std::unique_ptr<WritableMemoryBuffer> Body;
+};
+
+/// A simple handler which writes returned data to an HTTPResponseBuffer.
+/// Ignores all headers except the Content-Length, which it uses to
+/// allocate an appropriately-sized Body buffer.
+class BufferedHTTPResponseHandler final : public HTTPResponseHandler {
+ size_t Offset = 0;
+
+public:
+ /// Stores the data received from the HTTP server.
+ HTTPResponseBuffer ResponseBuffer;
+
+ /// These callbacks store the body and status code in an HTTPResponseBuffer
+ /// allocated based on Content-Length. The Content-Length header must be
+ /// handled by handleHeaderLine before any calls to handleBodyChunk.
+ Error handleHeaderLine(StringRef HeaderLine) override;
+ Error handleBodyChunk(StringRef BodyChunk) override;
+ Error handleStatusCode(unsigned Code) override;
+};
+
+/// A reusable client that can perform HTTPRequests through a network socket.
+class HTTPClient {
+public:
+ HTTPClient();
+ ~HTTPClient();
+
+ /// Returns true only if LLVM has been compiled with a working HTTPClient.
+ static bool isAvailable();
+
+ /// Must be called at the beginning of a program, while it is a single thread.
+ static void initialize();
+
+ /// Must be called at the end of a program, while it is a single thread.
+ static void cleanup();
+
+ /// Sets the timeout for the entire request, in milliseconds. A zero or
+ /// negative value means the request never times out.
+ void setTimeout(std::chrono::milliseconds Timeout);
+
+ /// Performs the Request, passing response data to the Handler. Returns all
+ /// errors which occur during the request. Aborts if an error is returned by a
+ /// Handler method.
+ Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler);
+
+ /// Performs the Request with the default BufferedHTTPResponseHandler, and
+ /// returns its HTTPResponseBuffer or an Error.
+ Expected<HTTPResponseBuffer> perform(const HTTPRequest &Request);
+
+ /// Performs an HTTPRequest with the default configuration to make a GET
+ /// request to the given Url. Returns an HTTPResponseBuffer or an Error.
+ Expected<HTTPResponseBuffer> get(StringRef Url);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_HTTP_CLIENT_H
diff --git a/llvm/include/llvm/Support/Mutex.h b/llvm/include/llvm/Support/Mutex.h
index 1d8a0d3c87cb..d73bb8ef1120 100644
--- a/llvm/include/llvm/Support/Mutex.h
+++ b/llvm/include/llvm/Support/Mutex.h
@@ -36,7 +36,7 @@ namespace llvm
return true;
} else {
// Single-threaded debugging code. This would be racy in
- // multithreaded mode, but provides not sanity checks in single
+ // multithreaded mode, but provides not basic checks in single
// threaded mode.
++acquired;
return true;
@@ -49,7 +49,7 @@ namespace llvm
return true;
} else {
// Single-threaded debugging code. This would be racy in
- // multithreaded mode, but provides not sanity checks in single
+ // multithreaded mode, but provides not basic checks in single
// threaded mode.
assert(acquired && "Lock not acquired before release!");
--acquired;
diff --git a/llvm/include/llvm/Support/RWMutex.h b/llvm/include/llvm/Support/RWMutex.h
index 150bc7dbbce1..33a5d3efffee 100644
--- a/llvm/include/llvm/Support/RWMutex.h
+++ b/llvm/include/llvm/Support/RWMutex.h
@@ -114,7 +114,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
++readers;
return true;
}
@@ -126,7 +126,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
assert(readers > 0 && "Reader lock not acquired before release!");
--readers;
return true;
@@ -139,7 +139,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
assert(writers == 0 && "Writer lock already acquired!");
++writers;
return true;
@@ -152,7 +152,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
assert(writers == 1 && "Writer lock not acquired before release!");
--writers;
return true;
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index 366dd3cf55c6..b11467dcce28 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -177,6 +177,18 @@ StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64);
} // namespace RISCV
+namespace ARM {
+struct ParsedBranchProtection {
+ StringRef Scope;
+ StringRef Key;
+ bool BranchTargetEnforcement;
+};
+
+bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
+ StringRef &Err);
+
+} // namespace ARM
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index 4c41b88d6043..8d30e8e92755 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -36,9 +36,6 @@ namespace llvm {
/// for some work to become available.
class ThreadPool {
public:
- using TaskTy = std::function<void()>;
- using PackagedTaskTy = std::packaged_task<void()>;
-
/// Construct a pool using the hardware strategy \p S for mapping hardware
/// execution resources (threads, cores, CPUs)
/// Defaults to using the maximum execution resources in the system, but
@@ -51,17 +48,17 @@ public:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename Function, typename... Args>
- inline std::shared_future<void> async(Function &&F, Args &&... ArgList) {
+ inline auto async(Function &&F, Args &&...ArgList) {
auto Task =
std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...);
- return asyncImpl(std::move(Task));
+ return async(std::move(Task));
}
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
- template <typename Function>
- inline std::shared_future<void> async(Function &&F) {
- return asyncImpl(std::forward<Function>(F));
+ template <typename Func>
+ auto async(Func &&F) -> std::shared_future<decltype(F())> {
+ return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F)));
}
/// Blocking wait for all the threads to complete and the queue to be empty.
@@ -74,17 +71,70 @@ public:
bool isWorkerThread() const;
private:
+ /// Helpers to create a promise and a callable wrapper of \p Task that sets
+ /// the result of the promise. Returns the callable and a future to access the
+ /// result.
+ template <typename ResTy>
+ static std::pair<std::function<void()>, std::future<ResTy>>
+ createTaskAndFuture(std::function<ResTy()> Task) {
+ std::shared_ptr<std::promise<ResTy>> Promise =
+ std::make_shared<std::promise<ResTy>>();
+ auto F = Promise->get_future();
+ return {
+ [Promise = std::move(Promise), Task]() { Promise->set_value(Task()); },
+ std::move(F)};
+ }
+ static std::pair<std::function<void()>, std::future<void>>
+ createTaskAndFuture(std::function<void()> Task) {
+ std::shared_ptr<std::promise<void>> Promise =
+ std::make_shared<std::promise<void>>();
+ auto F = Promise->get_future();
+ return {[Promise = std::move(Promise), Task]() {
+ Task();
+ Promise->set_value();
+ },
+ std::move(F)};
+ }
+
bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); }
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
- std::shared_future<void> asyncImpl(TaskTy F);
+ template <typename ResTy>
+ std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task) {
+
+#if LLVM_ENABLE_THREADS
+ /// Wrap the Task in a std::function<void()> that sets the result of the
+ /// corresponding future.
+ auto R = createTaskAndFuture(Task);
+
+ {
+ // Lock the queue and push the new task
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+
+ // Don't allow enqueueing after disabling the pool
+ assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
+ Tasks.push(std::move(R.first));
+ }
+ QueueCondition.notify_one();
+ return R.second.share();
+
+#else // LLVM_ENABLE_THREADS Disabled
+
+ // Get a Future with launch::deferred execution using std::async
+ auto Future = std::async(std::launch::deferred, std::move(Task)).share();
+ // Wrap the future so that both ThreadPool::wait() can operate and the
+ // returned future can be sync'ed on.
+ Tasks.push([Future]() { Future.get(); });
+ return Future;
+#endif
+ }
/// Threads in flight
std::vector<llvm::thread> Threads;
/// Tasks waiting for execution in the pool.
- std::queue<PackagedTaskTy> Tasks;
+ std::queue<std::function<void()>> Tasks;
/// Locking and signaling for accessing the Tasks queue.
std::mutex QueueLock;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2d3dbdda88a..1d189c6dea6d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -645,6 +645,13 @@ def extract_vec_elt_combines : GICombineGroup<[
extract_vec_elt_build_vec,
extract_all_elts_from_build_vector]>;
+def funnel_shift_from_or_shift : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
+>;
+
def funnel_shift_to_rotate : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_FSHL, G_FSHR):$root,
@@ -683,7 +690,8 @@ def bitfield_extract_from_and : GICombineRule<
[{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
-def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>;
+def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
+ funnel_shift_to_rotate]>;
def bitfield_extract_from_sext_inreg : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
@@ -751,6 +759,84 @@ def redundant_neg_operands: GICombineRule<
[{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+// Transform (fadd x, (fmul y, z)) -> (fma y, z, x)
+// (fadd x, (fmul y, z)) -> (fmad y, z, x)
+// Transform (fadd (fmul x, y), z) -> (fma x, y, z)
+// (fadd (fmul x, y), z) -> (fmad x, y, z)
+def combine_fadd_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+// -> (fmad (fpext x), (fpext y), z)
+// Transform (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+// -> (fmad (fpext y), (fpext z), x)
+def combine_fadd_fpext_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fadd (fma x, y, (fmul z, u)), v) -> (fma x, y, (fma z, u, v))
+// (fadd (fmad x, y, (fmul z, u)), v) -> (fmad x, y, (fmad z, u, v))
+// Transform (fadd v, (fma x, y, (fmul z, u))) -> (fma x, y, (fma z, u, v))
+// (fadd v, (fmad x, y, (fmul z, u))) -> (fmad x, y, (fmad z, u, v))
+def combine_fadd_fma_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFMAFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fadd (fma x, y, (fpext (fmul u, v))), z) ->
+// (fma x, y, (fma (fpext u), (fpext v), z))
+def combine_fadd_fpext_fma_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
+ *${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fmul x, y), z) -> (fma x, y, -z)
+// -> (fmad x, y, -z)
+def combine_fsub_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+// (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+def combine_fsub_fneg_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFNegFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fpext (fmul x, y)), z) ->
+// (fma (fpext x), (fpext y), (fneg z))
+def combine_fsub_fpext_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFpExtFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fneg (fpext (fmul x, y))), z) ->
+// (fneg (fma (fpext x), (fpext y), z))
+def combine_fsub_fpext_fneg_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFpExtFNegFMulToFMadOrFMA(
+ *${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -783,6 +869,12 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one]>;
+def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
+ combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
+ combine_fadd_fpext_fma_fmul_to_fmad_or_fma, combine_fsub_fmul_to_fmad_or_fma,
+ combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma,
+ combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>;
+
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
@@ -799,7 +891,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
form_bitfield_extract, constant_fold, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
- and_or_disjoint_mask ]>;
+ and_or_disjoint_mask, fma_combines]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 6e45f8f6fb05..429fcbd81b45 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -24,22 +24,47 @@ using namespace sampleprof;
namespace llvm {
namespace sampleprof {
+struct ProfiledCallGraphNode;
+
+struct ProfiledCallGraphEdge {
+ ProfiledCallGraphEdge(ProfiledCallGraphNode *Source,
+ ProfiledCallGraphNode *Target, uint64_t Weight)
+ : Source(Source), Target(Target), Weight(Weight) {}
+ ProfiledCallGraphNode *Source;
+ ProfiledCallGraphNode *Target;
+ uint64_t Weight;
+
+ // The call destination is the only important data here,
+ // allow to transparently unwrap into it.
+ operator ProfiledCallGraphNode *() const { return Target; }
+};
+
struct ProfiledCallGraphNode {
- ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {}
- StringRef Name;
- struct ProfiledCallGraphNodeComparer {
- bool operator()(const ProfiledCallGraphNode *L,
- const ProfiledCallGraphNode *R) const {
- return L->Name < R->Name;
+ // Sort edges by callee names only since all edges to be compared are from
+ // same caller. Edge weights are not considered either because for the same
+ // callee only the edge with the largest weight is added to the edge set.
+ struct ProfiledCallGraphEdgeComparer {
+ bool operator()(const ProfiledCallGraphEdge &L,
+ const ProfiledCallGraphEdge &R) const {
+ return L.Target->Name < R.Target->Name;
}
};
- std::set<ProfiledCallGraphNode *, ProfiledCallGraphNodeComparer> Callees;
+
+ using iterator = std::set<ProfiledCallGraphEdge>::iterator;
+ using const_iterator = std::set<ProfiledCallGraphEdge>::const_iterator;
+ using edge = ProfiledCallGraphEdge;
+ using edges = std::set<ProfiledCallGraphEdge, ProfiledCallGraphEdgeComparer>;
+
+ ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {}
+
+ StringRef Name;
+ edges Edges;
};
class ProfiledCallGraph {
public:
- using iterator = std::set<ProfiledCallGraphNode *>::iterator;
+ using iterator = std::set<ProfiledCallGraphEdge>::iterator;
// Constructor for non-CS profile.
ProfiledCallGraph(SampleProfileMap &ProfileMap) {
@@ -63,8 +88,9 @@ public:
while (!Queue.empty()) {
ContextTrieNode *Caller = Queue.front();
Queue.pop();
- // Add calls for context. When AddNodeWithSamplesOnly is true, both caller
- // and callee need to have context profile.
+ FunctionSamples *CallerSamples = Caller->getFunctionSamples();
+
+ // Add calls for context.
// Note that callsite target samples are completely ignored since they can
// conflict with the context edges, which are formed by context
// compression during profile generation, for cyclic SCCs. This may
@@ -74,31 +100,61 @@ public:
ContextTrieNode *Callee = &Child.second;
addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
+
+ // Fetch edge weight from the profile.
+ uint64_t Weight;
+ FunctionSamples *CalleeSamples = Callee->getFunctionSamples();
+ if (!CalleeSamples || !CallerSamples) {
+ Weight = 0;
+ } else {
+ uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
+ uint64_t CallsiteCount = 0;
+ LineLocation Callsite = Callee->getCallSiteLoc();
+ if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
+ SampleRecord::CallTargetMap &TargetCounts = CallTargets.get();
+ auto It = TargetCounts.find(CalleeSamples->getName());
+ if (It != TargetCounts.end())
+ CallsiteCount = It->second;
+ }
+ Weight = std::max(CallsiteCount, CalleeEntryCount);
+ }
+
addProfiledCall(ContextTracker.getFuncNameFor(Caller),
- ContextTracker.getFuncNameFor(Callee));
+ ContextTracker.getFuncNameFor(Callee), Weight);
}
}
}
- iterator begin() { return Root.Callees.begin(); }
- iterator end() { return Root.Callees.end(); }
+ iterator begin() { return Root.Edges.begin(); }
+ iterator end() { return Root.Edges.end(); }
ProfiledCallGraphNode *getEntryNode() { return &Root; }
void addProfiledFunction(StringRef Name) {
if (!ProfiledFunctions.count(Name)) {
// Link to synthetic root to make sure every node is reachable
// from root. This does not affect SCC order.
ProfiledFunctions[Name] = ProfiledCallGraphNode(Name);
- Root.Callees.insert(&ProfiledFunctions[Name]);
+ Root.Edges.emplace(&Root, &ProfiledFunctions[Name], 0);
}
}
- void addProfiledCall(StringRef CallerName, StringRef CalleeName) {
+private:
+ void addProfiledCall(StringRef CallerName, StringRef CalleeName,
+ uint64_t Weight = 0) {
assert(ProfiledFunctions.count(CallerName));
auto CalleeIt = ProfiledFunctions.find(CalleeName);
- if (CalleeIt == ProfiledFunctions.end()) {
+ if (CalleeIt == ProfiledFunctions.end())
return;
+ ProfiledCallGraphEdge Edge(&ProfiledFunctions[CallerName],
+ &CalleeIt->second, Weight);
+ auto &Edges = ProfiledFunctions[CallerName].Edges;
+ auto EdgeIt = Edges.find(Edge);
+ if (EdgeIt == Edges.end()) {
+ Edges.insert(Edge);
+ } else if (EdgeIt->Weight < Edge.Weight) {
+ // Replace existing call edges with same target but smaller weight.
+ Edges.erase(EdgeIt);
+ Edges.insert(Edge);
}
- ProfiledFunctions[CallerName].Callees.insert(&CalleeIt->second);
}
void addProfiledCalls(const FunctionSamples &Samples) {
@@ -107,20 +163,20 @@ public:
for (const auto &Sample : Samples.getBodySamples()) {
for (const auto &Target : Sample.second.getCallTargets()) {
addProfiledFunction(Target.first());
- addProfiledCall(Samples.getFuncName(), Target.first());
+ addProfiledCall(Samples.getFuncName(), Target.first(), Target.second);
}
}
for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
for (const auto &InlinedSamples : CallsiteSamples.second) {
addProfiledFunction(InlinedSamples.first);
- addProfiledCall(Samples.getFuncName(), InlinedSamples.first);
+ addProfiledCall(Samples.getFuncName(), InlinedSamples.first,
+ InlinedSamples.second.getEntrySamples());
addProfiledCalls(InlinedSamples.second);
}
}
}
-private:
ProfiledCallGraphNode Root;
StringMap<ProfiledCallGraphNode> ProfiledFunctions;
};
@@ -128,12 +184,14 @@ private:
} // end namespace sampleprof
template <> struct GraphTraits<ProfiledCallGraphNode *> {
+ using NodeType = ProfiledCallGraphNode;
using NodeRef = ProfiledCallGraphNode *;
- using ChildIteratorType = std::set<ProfiledCallGraphNode *>::iterator;
+ using EdgeType = NodeType::edge;
+ using ChildIteratorType = NodeType::const_iterator;
static NodeRef getEntryNode(NodeRef PCGN) { return PCGN; }
- static ChildIteratorType child_begin(NodeRef N) { return N->Callees.begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->Callees.end(); }
+ static ChildIteratorType child_begin(NodeRef N) { return N->Edges.begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->Edges.end(); }
};
template <>
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index c13407a44091..6002f0270083 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -141,7 +141,7 @@ private:
AsanDtorKind DestructorKind;
};
-// Insert AddressSanitizer (address sanity checking) instrumentation
+// Insert AddressSanitizer (address basic correctness checking) instrumentation
FunctionPass *createAddressSanitizerFunctionPass(
bool CompileKernel = false, bool Recover = false,
bool UseAfterScope = false,
diff --git a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
index d76b55babc74..45983ad9d571 100644
--- a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
+++ b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
@@ -22,6 +22,7 @@ class Function;
struct AnnotationRemarksPass : public PassInfoMixin<AnnotationRemarksPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 72cb606eb51a..3c529abce85a 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -55,7 +55,6 @@ class MDNode;
class MemorySSAUpdater;
class PHINode;
class StoreInst;
-class SwitchInst;
class TargetLibraryInfo;
class TargetTransformInfo;
@@ -238,10 +237,6 @@ CallInst *createCallMatchingInvoke(InvokeInst *II);
/// This function converts the specified invoek into a normall call.
void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr);
-/// This function removes the default destination from the specified switch.
-void createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU = nullptr);
-
///===---------------------------------------------------------------------===//
/// Dbg Intrinsic utilities
///
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index 22b2295cc9d7..c233e3dc168e 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -169,6 +169,10 @@ public:
/// Called to update debug info associated with the instruction.
virtual void updateDebugInfo(Instruction *I) const {}
+
+ /// Return false if a sub-class wants to keep one of the loads/stores
+ /// after the SSA construction.
+ virtual bool shouldDelete(Instruction *I) const { return true; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
new file mode 100644
index 000000000000..e1f681bbd367
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -0,0 +1,284 @@
+//===- Transforms/Utils/SampleProfileInference.h ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file provides the interface for the profile inference algorithm, profi.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
+#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+class BasicBlock;
+class Function;
+class MachineBasicBlock;
+class MachineFunction;
+
+namespace afdo_detail {
+
+template <class BlockT> struct TypeMap {};
+template <> struct TypeMap<BasicBlock> {
+ using BasicBlockT = BasicBlock;
+ using FunctionT = Function;
+};
+template <> struct TypeMap<MachineBasicBlock> {
+ using BasicBlockT = MachineBasicBlock;
+ using FunctionT = MachineFunction;
+};
+
+} // end namespace afdo_detail
+
+struct FlowJump;
+
+/// A wrapper of a binary basic block.
+struct FlowBlock {
+ uint64_t Index;
+ uint64_t Weight{0};
+ bool UnknownWeight{false};
+ uint64_t Flow{0};
+ bool HasSelfEdge{false};
+ std::vector<FlowJump *> SuccJumps;
+ std::vector<FlowJump *> PredJumps;
+
+ /// Check if it is the entry block in the function.
+ bool isEntry() const { return PredJumps.empty(); }
+
+ /// Check if it is an exit block in the function.
+ bool isExit() const { return SuccJumps.empty(); }
+};
+
+/// A wrapper of a jump between two basic blocks.
+struct FlowJump {
+ uint64_t Source;
+ uint64_t Target;
+ uint64_t Flow{0};
+ bool IsUnlikely{false};
+};
+
+/// A wrapper of binary function with basic blocks and jumps.
+struct FlowFunction {
+ std::vector<FlowBlock> Blocks;
+ std::vector<FlowJump> Jumps;
+ /// The index of the entry block.
+ uint64_t Entry;
+};
+
+void applyFlowInference(FlowFunction &Func);
+
+/// Sample profile inference pass.
+template <typename BT> class SampleProfileInference {
+public:
+ using BasicBlockT = typename afdo_detail::TypeMap<BT>::BasicBlockT;
+ using FunctionT = typename afdo_detail::TypeMap<BT>::FunctionT;
+ using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
+ using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
+ using EdgeWeightMap = DenseMap<Edge, uint64_t>;
+ using BlockEdgeMap =
+ DenseMap<const BasicBlockT *, SmallVector<const BasicBlockT *, 8>>;
+
+ SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors,
+ BlockWeightMap &SampleBlockWeights)
+ : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {}
+
+ /// Apply the profile inference algorithm for a given function
+ void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
+
+private:
+ /// Try to infer branch probabilities mimicking implementation of
+ /// BranchProbabilityInfo. Unlikely taken branches are marked so that the
+ /// inference algorithm can avoid sending flow along corresponding edges.
+ void findUnlikelyJumps(const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func);
+
+ /// Determine whether the block is an exit in the CFG.
+ bool isExit(const BasicBlockT *BB);
+
+ /// Function.
+ const FunctionT &F;
+
+ /// Successors for each basic block in the CFG.
+ BlockEdgeMap &Successors;
+
+ /// Map basic blocks to their sampled weights.
+ BlockWeightMap &SampleBlockWeights;
+};
+
+template <typename BT>
+void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights,
+ EdgeWeightMap &EdgeWeights) {
+ // Find all forwards reachable blocks which the inference algorithm will be
+ // applied on.
+ df_iterator_default_set<const BasicBlockT *> Reachable;
+ for (auto *BB : depth_first_ext(&F, Reachable))
+ (void)BB /* Mark all reachable blocks */;
+
+ // Find all backwards reachable blocks which the inference algorithm will be
+ // applied on.
+ df_iterator_default_set<const BasicBlockT *> InverseReachable;
+ for (const auto &BB : F) {
+ // An exit block is a block without any successors.
+ if (isExit(&BB)) {
+ for (auto *RBB : inverse_depth_first_ext(&BB, InverseReachable))
+ (void)RBB;
+ }
+ }
+
+ // Keep a stable order for reachable blocks
+ DenseMap<const BasicBlockT *, uint64_t> BlockIndex;
+ std::vector<const BasicBlockT *> BasicBlocks;
+ BlockIndex.reserve(Reachable.size());
+ BasicBlocks.reserve(Reachable.size());
+ for (const auto &BB : F) {
+ if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
+ BlockIndex[&BB] = BasicBlocks.size();
+ BasicBlocks.push_back(&BB);
+ }
+ }
+
+ BlockWeights.clear();
+ EdgeWeights.clear();
+ bool HasSamples = false;
+ for (const auto *BB : BasicBlocks) {
+ auto It = SampleBlockWeights.find(BB);
+ if (It != SampleBlockWeights.end() && It->second > 0) {
+ HasSamples = true;
+ BlockWeights[BB] = It->second;
+ }
+ }
+ // Quit early for functions with a single block or ones w/o samples
+ if (BasicBlocks.size() <= 1 || !HasSamples) {
+ return;
+ }
+
+ // Create necessary objects
+ FlowFunction Func;
+ Func.Blocks.reserve(BasicBlocks.size());
+ // Create FlowBlocks
+ for (const auto *BB : BasicBlocks) {
+ FlowBlock Block;
+ if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) {
+ Block.UnknownWeight = false;
+ Block.Weight = SampleBlockWeights[BB];
+ } else {
+ Block.UnknownWeight = true;
+ Block.Weight = 0;
+ }
+ Block.Index = Func.Blocks.size();
+ Func.Blocks.push_back(Block);
+ }
+ // Create FlowEdges
+ for (const auto *BB : BasicBlocks) {
+ for (auto *Succ : Successors[BB]) {
+ if (!BlockIndex.count(Succ))
+ continue;
+ FlowJump Jump;
+ Jump.Source = BlockIndex[BB];
+ Jump.Target = BlockIndex[Succ];
+ Func.Jumps.push_back(Jump);
+ if (BB == Succ) {
+ Func.Blocks[BlockIndex[BB]].HasSelfEdge = true;
+ }
+ }
+ }
+ for (auto &Jump : Func.Jumps) {
+ Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump);
+ Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
+ }
+
+ // Try to infer probabilities of jumps based on the content of basic block
+ findUnlikelyJumps(BasicBlocks, Successors, Func);
+
+ // Find the entry block
+ for (size_t I = 0; I < Func.Blocks.size(); I++) {
+ if (Func.Blocks[I].isEntry()) {
+ Func.Entry = I;
+ break;
+ }
+ }
+
+ // Create and apply the inference network model.
+ applyFlowInference(Func);
+
+ // Extract the resulting weights from the control flow
+ // All weights are increased by one to avoid propagation errors introduced by
+ // zero weights.
+ for (const auto *BB : BasicBlocks) {
+ BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow;
+ }
+ for (auto &Jump : Func.Jumps) {
+ Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]);
+ EdgeWeights[E] = Jump.Flow;
+ }
+
+#ifndef NDEBUG
+ // Unreachable blocks and edges should not have a weight.
+ for (auto &I : BlockWeights) {
+ assert(Reachable.contains(I.first));
+ assert(InverseReachable.contains(I.first));
+ }
+ for (auto &I : EdgeWeights) {
+ assert(Reachable.contains(I.first.first) &&
+ Reachable.contains(I.first.second));
+ assert(InverseReachable.contains(I.first.first) &&
+ InverseReachable.contains(I.first.second));
+ }
+#endif
+}
+
+template <typename BT>
+inline void SampleProfileInference<BT>::findUnlikelyJumps(
+ const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func) {}
+
+template <>
+inline void SampleProfileInference<BasicBlock>::findUnlikelyJumps(
+ const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func) {
+ for (auto &Jump : Func.Jumps) {
+ const auto *BB = BasicBlocks[Jump.Source];
+ const auto *Succ = BasicBlocks[Jump.Target];
+ const Instruction *TI = BB->getTerminator();
+ // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
+ // In that case block Succ should be a landing pad
+ if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) {
+ if (isa<InvokeInst>(TI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ const Instruction *SuccTI = Succ->getTerminator();
+ // Check if the target block contains UnreachableInst and mark it unlikely
+ if (SuccTI->getNumSuccessors() == 0) {
+ if (isa<UnreachableInst>(SuccTI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ }
+}
+
+template <typename BT>
+inline bool SampleProfileInference<BT>::isExit(const BasicBlockT *BB) {
+ return BB->succ_empty();
+}
+
+template <>
+inline bool SampleProfileInference<BasicBlock>::isExit(const BasicBlock *BB) {
+ return succ_empty(BB);
+}
+
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 6a2f0acf46f3..175bdde7fd05 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -38,6 +38,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
namespace llvm {
@@ -74,6 +75,8 @@ template <> struct IRTraits<BasicBlock> {
} // end namespace afdo_detail
+extern cl::opt<bool> SampleProfileUseProfi;
+
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
@@ -142,6 +145,9 @@ protected:
ArrayRef<BasicBlockT *> Descendants,
PostDominatorTreeT *DomTree);
void propagateWeights(FunctionT &F);
+ void applyProfi(FunctionT &F, BlockEdgeMap &Successors,
+ BlockWeightMap &SampleBlockWeights,
+ BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(FunctionT &F);
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
@@ -150,6 +156,11 @@ protected:
bool
computeAndPropagateWeights(FunctionT &F,
const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ void initWeightPropagation(FunctionT &F,
+ const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ void
+ finalizeWeightPropagation(FunctionT &F,
+ const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
void emitCoverageRemarks(FunctionT &F);
/// Map basic blocks to their computed weights.
@@ -741,50 +752,65 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
/// known).
template <typename BT>
void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) {
- bool Changed = true;
- unsigned I = 0;
-
- // If BB weight is larger than its corresponding loop's header BB weight,
- // use the BB weight to replace the loop header BB weight.
- for (auto &BI : F) {
- BasicBlockT *BB = &BI;
- LoopT *L = LI->getLoopFor(BB);
- if (!L) {
- continue;
+ // Flow-based profile inference is only usable with BasicBlock instantiation
+ // of SampleProfileLoaderBaseImpl.
+ if (SampleProfileUseProfi) {
+ // Prepare block sample counts for inference.
+ BlockWeightMap SampleBlockWeights;
+ for (const auto &BI : F) {
+ ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
+ if (Weight)
+ SampleBlockWeights[&BI] = Weight.get();
}
- BasicBlockT *Header = L->getHeader();
- if (Header && BlockWeights[BB] > BlockWeights[Header]) {
- BlockWeights[Header] = BlockWeights[BB];
+ // Fill in BlockWeights and EdgeWeights using an inference algorithm.
+ applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
+ } else {
+ bool Changed = true;
+ unsigned I = 0;
+
+ // If BB weight is larger than its corresponding loop's header BB weight,
+ // use the BB weight to replace the loop header BB weight.
+ for (auto &BI : F) {
+ BasicBlockT *BB = &BI;
+ LoopT *L = LI->getLoopFor(BB);
+ if (!L) {
+ continue;
+ }
+ BasicBlockT *Header = L->getHeader();
+ if (Header && BlockWeights[BB] > BlockWeights[Header]) {
+ BlockWeights[Header] = BlockWeights[BB];
+ }
}
- }
- // Before propagation starts, build, for each block, a list of
- // unique predecessors and successors. This is necessary to handle
- // identical edges in multiway branches. Since we visit all blocks and all
- // edges of the CFG, it is cleaner to build these lists once at the start
- // of the pass.
- buildEdges(F);
+ // Propagate until we converge or we go past the iteration limit.
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, false);
+ }
- // Propagate until we converge or we go past the iteration limit.
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, false);
- }
+ // The first propagation propagates BB counts from annotated BBs to unknown
+ // BBs. The 2nd propagation pass resets edges weights, and use all BB
+ // weights to propagate edge weights.
+ VisitedEdges.clear();
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, false);
+ }
- // The first propagation propagates BB counts from annotated BBs to unknown
- // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
- // to propagate edge weights.
- VisitedEdges.clear();
- Changed = true;
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, false);
+ // The 3rd propagation pass allows adjust annotated BB weights that are
+ // obviously wrong.
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, true);
+ }
}
+}
- // The 3rd propagation pass allows adjust annotated BB weights that are
- // obviously wrong.
- Changed = true;
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, true);
- }
+template <typename BT>
+void SampleProfileLoaderBaseImpl<BT>::applyProfi(
+ FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
+ BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
+ auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
+ Infer.apply(BlockWeights, EdgeWeights);
}
/// Generate branch weight metadata for all branches in \p F.
@@ -842,26 +868,64 @@ bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights(
Changed |= computeBlockWeights(F);
if (Changed) {
- // Add an entry count to the function using the samples gathered at the
- // function entry.
- // Sets the GUIDs that are inlined in the profiled binary. This is used
- // for ThinLink to make correct liveness analysis, and also make the IR
- // match the profiled binary before annotation.
- getFunction(F).setEntryCount(
- ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
- &InlinedGUIDs);
+ // Initialize propagation.
+ initWeightPropagation(F, InlinedGUIDs);
+ // Propagate weights to all edges.
+ propagateWeights(F);
+
+ // Post-process propagated weights.
+ finalizeWeightPropagation(F, InlinedGUIDs);
+ }
+
+ return Changed;
+}
+
+template <typename BT>
+void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation(
+ FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+ // Add an entry count to the function using the samples gathered at the
+ // function entry.
+ // Sets the GUIDs that are inlined in the profiled binary. This is used
+ // for ThinLink to make correct liveness analysis, and also make the IR
+ // match the profiled binary before annotation.
+ getFunction(F).setEntryCount(
+ ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
+ &InlinedGUIDs);
+
+ if (!SampleProfileUseProfi) {
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
// Find equivalence classes.
findEquivalenceClasses(F);
-
- // Propagate weights to all edges.
- propagateWeights(F);
}
- return Changed;
+ // Before propagation starts, build, for each block, a list of
+ // unique predecessors and successors. This is necessary to handle
+ // identical edges in multiway branches. Since we visit all blocks and all
+ // edges of the CFG, it is cleaner to build these lists once at the start
+ // of the pass.
+ buildEdges(F);
+}
+
+template <typename BT>
+void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation(
+ FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+ // If we utilize a flow-based count inference, then we trust the computed
+ // counts and set the entry count as computed by the algorithm. This is
+ // primarily done to sync the counts produced by profi and BFI inference,
+ // which uses the entry count for mass propagation.
+ // If profi produces a zero-value for the entry count, we fallback to
+ // Samples->getHeadSamples() + 1 to avoid functions with zero count.
+ if (SampleProfileUseProfi) {
+ const BasicBlockT *EntryBB = getEntryBB(&F);
+ if (BlockWeights[EntryBB] > 0) {
+ getFunction(F).setEntryCount(
+ ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
+ &InlinedGUIDs);
+ }
+ }
}
template <typename BT>