diff options
Diffstat (limited to 'llvm/include')
54 files changed, 1597 insertions, 422 deletions
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index d170eff17951..f2183ff52bfb 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -2377,10 +2377,21 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit); * * @{ */ + +/** Deprecated: Use LLVMAddAlias2 instead. */ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name); /** + * Add a GlobalAlias with the given value type, address space and aliasee. + * + * @see llvm::GlobalAlias::create() + */ +LLVMValueRef LLVMAddAlias2(LLVMModuleRef M, LLVMTypeRef ValueTy, + unsigned AddrSpace, LLVMValueRef Aliasee, + const char *Name); + +/** * Obtain a GlobalAlias value from a Module by its name. * * The returned value corresponds to a llvm::GlobalAlias value. diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 595cd94b6b8f..c2660502a419 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1458,10 +1458,8 @@ public: /// uint64_t. The bitwidth must be <= 64 or the value must fit within a /// uint64_t. Otherwise an assertion will result. uint64_t getZExtValue() const { - if (isSingleWord()) { - assert(BitWidth && "zero width values not allowed"); + if (isSingleWord()) return U.VAL; - } assert(getActiveBits() <= 64 && "Too many bits for uint64_t"); return U.pVal[0]; } diff --git a/llvm/include/llvm/ADT/SCCIterator.h b/llvm/include/llvm/ADT/SCCIterator.h index 8a7c0a78a0fc..ad35e09f0f74 100644 --- a/llvm/include/llvm/ADT/SCCIterator.h +++ b/llvm/include/llvm/ADT/SCCIterator.h @@ -28,6 +28,10 @@ #include <cassert> #include <cstddef> #include <iterator> +#include <queue> +#include <set> +#include <unordered_map> +#include <unordered_set> #include <vector> namespace llvm { @@ -234,6 +238,135 @@ template <class T> scc_iterator<T> scc_end(const T &G) { return scc_iterator<T>::end(G); } +/// Sort the nodes of a directed SCC in the decreasing order of the edge +/// weights. The instantiating GraphT type should have weighted edge type +/// declared in its graph traits in order to use this iterator. +/// +/// This is implemented using Kruskal's minimal spanning tree algorithm followed +/// by a BFS walk. First a maximum spanning tree (forest) is built based on all +/// edges within the SCC collection. Then a BFS walk is initiated on tree nodes +/// that do not have a predecessor. Finally, the BFS order computed is the +/// traversal order of the nodes of the SCC. Such order ensures that +/// high-weighted edges are visited first during the tranversal. +template <class GraphT, class GT = GraphTraits<GraphT>> +class scc_member_iterator { + using NodeType = typename GT::NodeType; + using EdgeType = typename GT::EdgeType; + using NodesType = std::vector<NodeType *>; + + // Auxilary node information used during the MST calculation. + struct NodeInfo { + NodeInfo *Group = this; + uint32_t Rank = 0; + bool Visited = true; + }; + + // Find the root group of the node and compress the path from node to the + // root. + NodeInfo *find(NodeInfo *Node) { + if (Node->Group != Node) + Node->Group = find(Node->Group); + return Node->Group; + } + + // Union the source and target node into the same group and return true. + // Returns false if they are already in the same group. + bool unionGroups(const EdgeType *Edge) { + NodeInfo *G1 = find(&NodeInfoMap[Edge->Source]); + NodeInfo *G2 = find(&NodeInfoMap[Edge->Target]); + + // If the edge forms a cycle, do not add it to MST + if (G1 == G2) + return false; + + // Make the smaller rank tree a direct child or the root of high rank tree. + if (G1->Rank < G1->Rank) + G1->Group = G2; + else { + G2->Group = G1; + // If the ranks are the same, increment root of one tree by one. + if (G1->Rank == G2->Rank) + G2->Rank++; + } + return true; + } + + std::unordered_map<NodeType *, NodeInfo> NodeInfoMap; + NodesType Nodes; + +public: + scc_member_iterator(const NodesType &InputNodes); + + NodesType &operator*() { return Nodes; } +}; + +template <class GraphT, class GT> +scc_member_iterator<GraphT, GT>::scc_member_iterator( + const NodesType &InputNodes) { + if (InputNodes.size() <= 1) { + Nodes = InputNodes; + return; + } + + // Initialize auxilary node information. + NodeInfoMap.clear(); + for (auto *Node : InputNodes) { + // This is specifically used to construct a `NodeInfo` object in place. An + // insert operation will involve a copy construction which invalidate the + // initial value of the `Group` field which should be `this`. + (void)NodeInfoMap[Node].Group; + } + + // Sort edges by weights. + struct EdgeComparer { + bool operator()(const EdgeType *L, const EdgeType *R) const { + return L->Weight > R->Weight; + } + }; + + std::multiset<const EdgeType *, EdgeComparer> SortedEdges; + for (auto *Node : InputNodes) { + for (auto &Edge : Node->Edges) { + if (NodeInfoMap.count(Edge.Target)) + SortedEdges.insert(&Edge); + } + } + + // Traverse all the edges and compute the Maximum Weight Spanning Tree + // using Kruskal's algorithm. + std::unordered_set<const EdgeType *> MSTEdges; + for (auto *Edge : SortedEdges) { + if (unionGroups(Edge)) + MSTEdges.insert(Edge); + } + + // Do BFS on MST, starting from nodes that have no incoming edge. These nodes + // are "roots" of the MST forest. This ensures that nodes are visited before + // their decsendents are, thus ensures hot edges are processed before cold + // edges, based on how MST is computed. + for (const auto *Edge : MSTEdges) + NodeInfoMap[Edge->Target].Visited = false; + + std::queue<NodeType *> Queue; + for (auto &Node : NodeInfoMap) + if (Node.second.Visited) + Queue.push(Node.first); + + while (!Queue.empty()) { + auto *Node = Queue.front(); + Queue.pop(); + Nodes.push_back(Node); + for (auto &Edge : Node->Edges) { + if (MSTEdges.count(&Edge) && !NodeInfoMap[Edge.Target].Visited) { + NodeInfoMap[Edge.Target].Visited = true; + Queue.push(Edge.Target); + } + } + } + + assert(InputNodes.size() == Nodes.size() && "missing nodes in MST"); + std::reverse(Nodes.begin(), Nodes.end()); +} } // end namespace llvm #endif // LLVM_ADT_SCCITERATOR_H diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 48f15b02283a..f9b658ca960a 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1016,20 +1016,39 @@ public: private: std::tuple<RangeTs...> Ranges; - template <size_t... Ns> iterator begin_impl(std::index_sequence<Ns...>) { + template <size_t... Ns> + iterator begin_impl(std::index_sequence<Ns...>) { + return iterator(std::get<Ns>(Ranges)...); + } + template <size_t... Ns> + iterator begin_impl(std::index_sequence<Ns...>) const { return iterator(std::get<Ns>(Ranges)...); } template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) { return iterator(make_range(std::end(std::get<Ns>(Ranges)), std::end(std::get<Ns>(Ranges)))...); } + template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const { + return iterator(make_range(std::end(std::get<Ns>(Ranges)), + std::end(std::get<Ns>(Ranges)))...); + } public: concat_range(RangeTs &&... Ranges) : Ranges(std::forward<RangeTs>(Ranges)...) {} - iterator begin() { return begin_impl(std::index_sequence_for<RangeTs...>{}); } - iterator end() { return end_impl(std::index_sequence_for<RangeTs...>{}); } + iterator begin() { + return begin_impl(std::index_sequence_for<RangeTs...>{}); + } + iterator begin() const { + return begin_impl(std::index_sequence_for<RangeTs...>{}); + } + iterator end() { + return end_impl(std::index_sequence_for<RangeTs...>{}); + } + iterator end() const { + return end_impl(std::index_sequence_for<RangeTs...>{}); + } }; } // end namespace detail @@ -1977,10 +1996,16 @@ public: enumerator_iter<R> begin() { return enumerator_iter<R>(0, std::begin(TheRange)); } + enumerator_iter<R> begin() const { + return enumerator_iter<R>(0, std::begin(TheRange)); + } enumerator_iter<R> end() { return enumerator_iter<R>(std::end(TheRange)); } + enumerator_iter<R> end() const { + return enumerator_iter<R>(std::end(TheRange)); + } private: R TheRange; diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index c26dbc457949..ea4c0312e073 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -20,6 +20,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" @@ -50,6 +51,7 @@ enum class RecurKind { FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). FMax, ///< FP max implemented in terms of select(cmp()). + FMulAdd, ///< Fused multiply-add of floats (a * b + c). SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop ///< invariant SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop @@ -260,6 +262,12 @@ public: SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi, Loop *L) const; + /// Returns true if the instruction is a call to the llvm.fmuladd intrinsic. + static bool isFMulAddIntrinsic(Instruction *I) { + return isa<IntrinsicInst>(I) && + cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd; + } + private: // The starting value of the recurrence. // It does not have to be zero! diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h index 6eb637e72782..4ceae2d29f16 100644 --- a/llvm/include/llvm/Analysis/Lint.h +++ b/llvm/include/llvm/Analysis/Lint.h @@ -6,11 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file defines lint interfaces that can be used for some sanity checking -// of input to the system, and for checking that transformations -// haven't done something bad. In contrast to the Verifier, the Lint checker -// checks for undefined behavior or constructions with likely unintended -// behavior. +// This file defines lint interfaces that can be used for some validation of +// input to the system, and for checking that transformations haven't done +// something bad. In contrast to the Verifier, the Lint checker checks for +// undefined behavior or constructions with likely unintended behavior. // // To see what specifically is checked, look at Lint.cpp // diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index a2260688e3d6..df50611832ce 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1378,6 +1378,8 @@ private: /// includes an exact count and a maximum count. /// class BackedgeTakenInfo { + friend class ScalarEvolution; + /// A list of computable exits and their not-taken counts. Loops almost /// never have more than one computable exit. SmallVector<ExitNotTakenInfo, 1> ExitNotTaken; @@ -1398,9 +1400,6 @@ private: /// True iff the backedge is taken either exactly Max or zero times. bool MaxOrZero = false; - /// SCEV expressions used in any of the ExitNotTakenInfo counts. - SmallPtrSet<const SCEV *, 4> Operands; - bool isComplete() const { return IsComplete; } const SCEV *getConstantMax() const { return ConstantMax; } @@ -1466,10 +1465,6 @@ private: /// Return true if the number of times this backedge is taken is either the /// value returned by getConstantMax or zero. bool isConstantMaxOrZero(ScalarEvolution *SE) const; - - /// Return true if any backedge taken count expressions refer to the given - /// subexpression. - bool hasOperand(const SCEV *S) const; }; /// Cache the backedge-taken count of the loops for this function as they @@ -1480,6 +1475,10 @@ private: /// function as they are computed. DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts; + /// Loops whose backedge taken counts directly use this non-constant SCEV. + DenseMap<const SCEV *, SmallPtrSet<PointerIntPair<const Loop *, 1, bool>, 4>> + BECountUsers; + /// This map contains entries for all of the PHI instructions that we /// attempt to compute constant evolutions for. This allows us to avoid /// potentially expensive recomputation of these properties. An instruction @@ -1492,6 +1491,11 @@ private: DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>> ValuesAtScopes; + /// Reverse map for invalidation purposes: Stores of which SCEV and which + /// loop this is the value-at-scope of. + DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>> + ValuesAtScopesUsers; + /// Memoized computeLoopDisposition results. DenseMap<const SCEV *, SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>> @@ -1616,11 +1620,6 @@ private: /// SCEV+Loop pair. const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L); - /// This looks up computed SCEV values for all instructions that depend on - /// the given instruction and removes them from the ValueExprMap map if they - /// reference SymName. This is used during PHI resolution. - void forgetSymbolicName(Instruction *I, const SCEV *SymName); - /// Return the BackedgeTakenInfo for the given loop, lazily computing new /// values if the loop hasn't been analyzed yet. The returned result is /// guaranteed not to be predicated. @@ -1911,6 +1910,9 @@ private: bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); + /// Forget predicated/non-predicated backedge taken counts for the given loop. + void forgetBackedgeTakenCounts(const Loop *L, bool Predicated); + /// Drop memoized information for all \p SCEVs. void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs); @@ -1923,6 +1925,9 @@ private: /// Erase Value from ValueExprMap and ExprValueMap. void eraseValueFromMap(Value *V); + /// Insert V to S mapping into ValueExprMap and ExprValueMap. + void insertValueToMap(Value *V, const SCEV *S); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index ded53617b304..9c1abef33b28 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1048,6 +1048,12 @@ TLI_DEFINE_STRING_INTERNAL("memset") /// void memset_pattern16(void *b, const void *pattern16, size_t len); TLI_DEFINE_ENUM_INTERNAL(memset_pattern16) TLI_DEFINE_STRING_INTERNAL("memset_pattern16") +/// void memset_pattern4(void *b, const void *pattern4, size_t len); +TLI_DEFINE_ENUM_INTERNAL(memset_pattern4) +TLI_DEFINE_STRING_INTERNAL("memset_pattern4") +/// void memset_pattern8(void *b, const void *pattern8, size_t len); +TLI_DEFINE_ENUM_INTERNAL(memset_pattern8) +TLI_DEFINE_STRING_INTERNAL("memset_pattern8") /// int mkdir(const char *path, mode_t mode); TLI_DEFINE_ENUM_INTERNAL(mkdir) TLI_DEFINE_STRING_INTERNAL("mkdir") diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index 24e2318de48b..751c88a4ecbb 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -115,7 +115,7 @@ struct VFShape { return {EC, Parameters}; } - /// Sanity check on the Parameters in the VFShape. + /// Validation check on the Parameters in the VFShape. bool hasValidParameterList() const; }; diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index a270fd399aeb..c199e933116a 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1602,6 +1602,13 @@ enum { NT_FREEBSD_PROCSTAT_AUXV = 16, }; +// NetBSD core note types. +enum { + NT_NETBSDCORE_PROCINFO = 1, + NT_NETBSDCORE_AUXV = 2, + NT_NETBSDCORE_LWPSTATUS = 24, +}; + // OpenBSD core note types. enum { NT_OPENBSD_PROCINFO = 10, diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index ed3cd54df272..73d39fecc268 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -130,6 +130,7 @@ bool getEnableMachineFunctionSplitter(); bool getEnableDebugEntryValues(); bool getValueTrackingVariableLocations(); +Optional<bool> getExplicitValueTrackingVariableLocations(); bool getForceDwarfFrameSection(); @@ -170,6 +171,10 @@ void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F); /// Set function attributes of functions in Module M based on CPU, /// Features, and command line flags. void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M); + +/// Should value-tracking variable locations / instruction referencing be +/// enabled by default for this triple? +bool getDefaultValueTrackingVariableLocations(const llvm::Triple &T); } // namespace codegen } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index ff4ad4b72636..f3fa652b0175 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -564,6 +564,7 @@ public: /// This variant does not erase \p MI after calling the build function. void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); bool matchRotateOutOfRange(MachineInstr &MI); @@ -648,6 +649,54 @@ public: /// (fma fneg(x), fneg(y), z) -> (fma x, y, z) bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo); + bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, + bool &HasFMAD, bool &Aggressive, + bool CanReassociate = false); + + /// Transform (fadd (fmul x, y), z) -> (fma x, y, z) + /// (fadd (fmul x, y), z) -> (fmad x, y, z) + bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + /// (fadd (fpext (fmul x, y)), z) -> (fmad (fpext x), (fpext y), z) + bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) + /// (fadd (fmad x, y, (fmul u, v)), z) -> (fmad x, y, (fmad u, v, z)) + bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + // Transform (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + // (fadd (fmad x, y, (fpext (fmul u, v))), z) + // -> (fmad x, y, (fmad (fpext u), (fpext v), z)) + bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fsub (fmul x, y), z) -> (fma x, y, -z) + /// (fsub (fmul x, y), z) -> (fmad x, y, -z) + bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + /// (fsub (fneg (fmul, x, y)), z) -> (fmad (fneg x), y, (fneg z)) + bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fsub (fpext (fmul x, y)), z) + /// -> (fma (fpext x), (fpext y), (fneg z)) + /// (fsub (fpext (fmul x, y)), z) + /// -> (fmad (fpext x), (fpext y), (fneg z)) + bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// Transform (fsub (fpext (fneg (fmul x, y))), z) + /// -> (fneg (fma (fpext x), (fpext y), z)) + /// (fsub (fpext (fneg (fmul x, y))), z) + /// -> (fneg (fmad (fpext x), (fpext y), z)) + bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, + BuildFnTy &MatchInfo); + private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index e813d030eec3..a41166bb4c6b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -129,6 +129,43 @@ inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) { return SpecificConstantMatch(RequestedValue); } +/// Matcher for a specific constant splat. +struct SpecificConstantSplatMatch { + int64_t RequestedVal; + SpecificConstantSplatMatch(int64_t RequestedVal) + : RequestedVal(RequestedVal) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + return isBuildVectorConstantSplat(Reg, MRI, RequestedVal, + /* AllowUndef */ false); + } +}; + +/// Matches a constant splat of \p RequestedValue. +inline SpecificConstantSplatMatch m_SpecificICstSplat(int64_t RequestedValue) { + return SpecificConstantSplatMatch(RequestedValue); +} + +/// Matcher for a specific constant or constant splat. +struct SpecificConstantOrSplatMatch { + int64_t RequestedVal; + SpecificConstantOrSplatMatch(int64_t RequestedVal) + : RequestedVal(RequestedVal) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + int64_t MatchedVal; + if (mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal) + return true; + return isBuildVectorConstantSplat(Reg, MRI, RequestedVal, + /* AllowUndef */ false); + } +}; + +/// Matches a \p RequestedValue constant or a constant splat of \p +/// RequestedValue. +inline SpecificConstantOrSplatMatch +m_SpecificICstOrSplat(int64_t RequestedValue) { + return SpecificConstantOrSplatMatch(RequestedValue); +} + ///{ /// Convenience matchers for specific integer values. inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); } @@ -489,6 +526,11 @@ inline UnaryOp_match<SrcTy, TargetOpcode::COPY> m_Copy(SrcTy &&Src) { return UnaryOp_match<SrcTy, TargetOpcode::COPY>(std::forward<SrcTy>(Src)); } +template <typename SrcTy> +inline UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT> m_GFSqrt(const SrcTy &Src) { + return UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT>(Src); +} + // General helper for generic MI compares, i.e. G_ICMP and G_FCMP // TODO: Allow checking a specific predicate. template <typename Pred_P, typename LHS_P, typename RHS_P, unsigned Opcode> diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 86545b976b8d..4126e2ac7b8f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -378,6 +378,18 @@ Optional<FPValueAndVReg> getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef = true); +/// Return true if the specified register is defined by G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef. +bool isBuildVectorConstantSplat(const Register Reg, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef); + +/// Return true if the specified instruction is a G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef. +bool isBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI, + int64_t SplatValue, bool AllowUndef); + /// Return true if the specified instruction is a G_BUILD_VECTOR or /// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef. bool isBuildVectorAllZeros(const MachineInstr &MI, diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index dcbd19ac6b5a..ec23dde0c6c0 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -938,7 +938,8 @@ public: int64_t Offset, LLT Ty); MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { - return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size)); + return getMachineMemOperand( + MMO, Offset, Size == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * Size)); } /// getMachineMemOperand - Allocate a new MachineMemOperand by copying diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index fa22ca6a98ac..a855a0797723 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -139,10 +139,13 @@ public: /// int getOffsetOfLocalArea() const { return LocalAreaOffset; } - /// isFPCloseToIncomingSP - Return true if the frame pointer is close to - /// the incoming stack pointer, false if it is close to the post-prologue - /// stack pointer. - virtual bool isFPCloseToIncomingSP() const { return true; } + /// Control the placement of special register scavenging spill slots when + /// allocating a stack frame. + /// + /// If this returns true, the frame indexes used by the RegScavenger will be + /// allocated closest to the incoming stack pointer. + virtual bool allocateScavengingFrameIndexesNearIncomingSP( + const MachineFunction &MF) const; /// assignCalleeSavedSpillSlots - Allows target to override spill slot /// assignment logic. If implemented, assignCalleeSavedSpillSlots() should @@ -220,6 +223,9 @@ public: virtual void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {} + /// Does the stack probe function call return with a modified stack pointer? + virtual bool stackProbeFunctionModifiesSP() const { return false; } + /// Adjust the prologue to have the function use segmented stacks. This works /// by adding a check even before the "normal" function prologue. virtual void adjustForSegmentedStacks(MachineFunction &MF, diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 8bc730a3eda5..d43dd9fac85d 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1913,6 +1913,12 @@ public: "Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!"); } + /// Optional target hook to create the LLVM IR attributes for the outlined + /// function. If overridden, the overriding function must call the default + /// implementation. + virtual void mergeOutliningCandidateAttributes( + Function &F, std::vector<outliner::Candidate> &Candidates) const; + /// Returns how or if \p MI should be outlined. virtual outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 87f5168ec48f..d862701c37d7 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -425,6 +425,12 @@ public: return true; } + /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded + /// using generic code in SelectionDAGBuilder. + virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const { + return true; + } + /// Return true if it is profitable to convert a select of FP constants into /// a constant pool load whose address depends on the select condition. The /// parameter may be used to differentiate a select with FP compare from @@ -806,9 +812,12 @@ public: /// Return true if target always benefits from combining into FMA for a /// given value type. This must typically return false on targets where FMA /// takes more cycles to execute than FADD. - virtual bool enableAggressiveFMAFusion(EVT VT) const { - return false; - } + virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } + + /// Return true if target always benefits from combining into FMA for a + /// given value type. This must typically return false on targets where FMA + /// takes more cycles to execute than FADD. + virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } /// Return the ValueType of the result of SETCC operations. virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, @@ -2710,6 +2719,14 @@ public: /// Return true if an fpext operation input to an \p Opcode operation is free /// (for instance, because half-precision floating-point numbers are /// implicitly extended to float-precision) for an FMA instruction. + virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, + LLT DestTy, LLT SrcTy) const { + return false; + } + + /// Return true if an fpext operation input to an \p Opcode operation is free + /// (for instance, because half-precision floating-point numbers are + /// implicitly extended to float-precision) for an FMA instruction. virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT, EVT SrcVT) const { assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && @@ -2748,11 +2765,47 @@ public: return false; } + /// Return true if an FMA operation is faster than a pair of fmul and fadd + /// instructions. fmuladd intrinsics will be expanded to FMAs when this method + /// returns true, otherwise fmuladd is expanded to fmul + fadd. + /// + /// NOTE: This may be called before legalization on types for which FMAs are + /// not legal, but should return true if those types will eventually legalize + /// to types that support FMAs. After legalization, it will only be called on + /// types that support FMAs (via Legal or Custom actions) + virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + LLT) const { + return false; + } + /// IR version virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { return false; } + /// Returns true if \p MI can be combined with another instruction to + /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD, + /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be + /// distributed into an fadd/fsub. + virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const { + assert((MI.getOpcode() == TargetOpcode::G_FADD || + MI.getOpcode() == TargetOpcode::G_FSUB || + MI.getOpcode() == TargetOpcode::G_FMUL) && + "unexpected node in FMAD forming combine"); + switch (Ty.getScalarSizeInBits()) { + case 16: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16); + case 32: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32); + case 64: + return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64); + default: + break; + } + + return false; + } + /// Returns true if be combined with to form an ISD::FMAD. \p N may be an /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an /// fadd/fsub. @@ -2852,6 +2905,12 @@ public: /// passed to the fp16 to fp conversion library function. virtual bool shouldKeepZExtForFP16Conv() const { return false; } + /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT + /// from min(max(fptoi)) saturation patterns. + virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { + return isOperationLegalOrCustom(Op, VT); + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 902973ff5722..ae1afeb668be 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -156,6 +156,11 @@ public: NormalUnits.getNumInfoUnits()); } + const DWARFUnitVector &getNormalUnitsVector() { + parseNormalUnits(); + return NormalUnits; + } + /// Get units from .debug_types in this context. unit_iterator_range types_section_units() { parseNormalUnits(); diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index d471b80c7fe1..505686bfbf59 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include <cstdint> #include <map> #include <set> @@ -153,8 +154,8 @@ private: /// \param SectionKind The object-file section kind that S comes from. /// /// \returns The number of errors that occurred during verification. - unsigned verifyUnitSection(const DWARFSection &S, - DWARFSectionKind SectionKind); + unsigned verifyUnitSection(const DWARFSection &S); + unsigned verifyUnits(const DWARFUnitVector &Units); /// Verifies that a call site entry is nested within a subprogram with a /// DW_AT_call attribute. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 362e8ab8e296..2180be3341e1 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -519,6 +519,7 @@ private: /// symbols of an error. class MaterializationResponsibility { friend class ExecutionSession; + friend class JITDylib; public: MaterializationResponsibility(MaterializationResponsibility &&) = delete; @@ -535,10 +536,10 @@ public: /// Returns the target JITDylib that these symbols are being materialized /// into. - JITDylib &getTargetJITDylib() const { return *JD; } + JITDylib &getTargetJITDylib() const { return JD; } /// Returns the ExecutionSession for this instance. - ExecutionSession &getExecutionSession(); + ExecutionSession &getExecutionSession() const; /// Returns the symbol flags map for this responsibility instance. /// Note: The returned flags may have transient flags (Lazy, Materializing) @@ -640,15 +641,16 @@ public: private: /// Create a MaterializationResponsibility for the given JITDylib and /// initial symbols. - MaterializationResponsibility(JITDylibSP JD, SymbolFlagsMap SymbolFlags, + MaterializationResponsibility(ResourceTrackerSP RT, + SymbolFlagsMap SymbolFlags, SymbolStringPtr InitSymbol) - : JD(std::move(JD)), SymbolFlags(std::move(SymbolFlags)), - InitSymbol(std::move(InitSymbol)) { - assert(this->JD && "Cannot initialize with null JITDylib"); + : JD(RT->getJITDylib()), RT(std::move(RT)), + SymbolFlags(std::move(SymbolFlags)), InitSymbol(std::move(InitSymbol)) { assert(!this->SymbolFlags.empty() && "Materializing nothing?"); } - JITDylibSP JD; + JITDylib &JD; + ResourceTrackerSP RT; SymbolFlagsMap SymbolFlags; SymbolStringPtr InitSymbol; }; @@ -913,12 +915,26 @@ public: const SymbolLookupSet &LookupSet) = 0; }; -/// A symbol table that supports asynchoronous symbol queries. +/// Represents a JIT'd dynamic library. +/// +/// This class aims to mimic the behavior of a regular dylib or shared object, +/// but without requiring the contained program representations to be compiled +/// up-front. The JITDylib's content is defined by adding MaterializationUnits, +/// and contained MaterializationUnits will typically rely on the JITDylib's +/// links-against order to resolve external references (similar to a regular +/// dylib). +/// +/// The JITDylib object is a thin wrapper that references state held by the +/// ExecutionSession. JITDylibs can be removed, clearing this underlying state +/// and leaving the JITDylib object in a defunct state. In this state the +/// JITDylib's name is guaranteed to remain accessible. If the ExecutionSession +/// is still alive then other operations are callable but will return an Error +/// or null result (depending on the API). It is illegal to call any operation +/// other than getName on a JITDylib after the ExecutionSession has been torn +/// down. /// -/// Represents a virtual shared object. Instances can not be copied or moved, so -/// their addresses may be used as keys for resource management. -/// JITDylib state changes must be made via an ExecutionSession to guarantee -/// that they are synchronized with respect to other JITDylib operations. +/// JITDylibs cannot be moved or copied. Their address is stable, and useful as +/// a key in some JIT data structures. class JITDylib : public ThreadSafeRefCountedBase<JITDylib>, public jitlink::JITLinkDylib { friend class AsynchronousSymbolQuery; @@ -931,10 +947,21 @@ public: JITDylib &operator=(const JITDylib &) = delete; JITDylib(JITDylib &&) = delete; JITDylib &operator=(JITDylib &&) = delete; + ~JITDylib(); /// Get a reference to the ExecutionSession for this JITDylib. + /// + /// It is legal to call this method on a defunct JITDylib, however the result + /// will only usable if the ExecutionSession is still alive. If this JITDylib + /// is held by an error that may have torn down the JIT then the result + /// should not be used. ExecutionSession &getExecutionSession() const { return ES; } + /// Dump current JITDylib state to OS. + /// + /// It is legal to call this method on a defunct JITDylib. + void dump(raw_ostream &OS); + /// Calls remove on all trackers currently associated with this JITDylib. /// Does not run static deinits. /// @@ -942,12 +969,21 @@ public: /// added concurrently while the clear is underway, and the newly added /// code will *not* be cleared. Adding new code concurrently with a clear /// is usually a bug and should be avoided. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. Error clear(); /// Get the default resource tracker for this JITDylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. ResourceTrackerSP getDefaultResourceTracker(); /// Create a resource tracker for this JITDylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. ResourceTrackerSP createResourceTracker(); /// Adds a definition generator to this JITDylib and returns a referenece to @@ -956,6 +992,9 @@ public: /// When JITDylibs are searched during lookup, if no existing definition of /// a symbol is found, then any generators that have been added are run (in /// the order that they were added) to potentially generate a definition. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename GeneratorT> GeneratorT &addGenerator(std::unique_ptr<GeneratorT> DefGenerator); @@ -963,6 +1002,9 @@ public: /// /// The given generator must exist in this JITDylib's generators list (i.e. /// have been added and not yet removed). + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void removeGenerator(DefinitionGenerator &G); /// Set the link order to be used when fixing up definitions in JITDylib. @@ -983,26 +1025,41 @@ public: /// as the first in the link order (instead of this dylib) ensures that /// definitions within this dylib resolve to the lazy-compiling stubs, /// rather than immediately materializing the definitions in this dylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void setLinkOrder(JITDylibSearchOrder NewSearchOrder, bool LinkAgainstThisJITDylibFirst = true); /// Add the given JITDylib to the link order for definitions in this /// JITDylib. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void addToLinkOrder(JITDylib &JD, JITDylibLookupFlags JDLookupFlags = JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Replace OldJD with NewJD in the link order if OldJD is present. /// Otherwise this operation is a no-op. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD, JITDylibLookupFlags JDLookupFlags = JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Remove the given JITDylib from the link order for this JITDylib if it is /// present. Otherwise this operation is a no-op. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. void removeFromLinkOrder(JITDylib &JD); /// Do something with the link order (run under the session lock). + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename Func> auto withLinkOrderDo(Func &&F) -> decltype(F(std::declval<const JITDylibSearchOrder &>())); @@ -1014,6 +1071,9 @@ public: /// /// This overload always takes ownership of the MaterializationUnit. If any /// errors occur, the MaterializationUnit consumed. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename MaterializationUnitType> Error define(std::unique_ptr<MaterializationUnitType> &&MU, ResourceTrackerSP RT = nullptr); @@ -1025,6 +1085,9 @@ public: /// generated. If an error occurs, ownership remains with the caller. This /// may allow the caller to modify the MaterializationUnit to correct the /// issue, then re-call define. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. template <typename MaterializationUnitType> Error define(std::unique_ptr<MaterializationUnitType> &MU, ResourceTrackerSP RT = nullptr); @@ -1039,28 +1102,40 @@ public: /// /// On success, all symbols are removed. On failure, the JITDylib state is /// left unmodified (no symbols are removed). + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. Error remove(const SymbolNameSet &Names); - /// Dump current JITDylib state to OS. - void dump(raw_ostream &OS); - /// Returns the given JITDylibs and all of their transitive dependencies in /// DFS order (based on linkage relationships). Each JITDylib will appear /// only once. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. static std::vector<JITDylibSP> getDFSLinkOrder(ArrayRef<JITDylibSP> JDs); /// Returns the given JITDylibs and all of their transitive dependensies in /// reverse DFS order (based on linkage relationships). Each JITDylib will /// appear only once. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. static std::vector<JITDylibSP> getReverseDFSLinkOrder(ArrayRef<JITDylibSP> JDs); /// Return this JITDylib and its transitive dependencies in DFS order /// based on linkage relationships. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. std::vector<JITDylibSP> getDFSLinkOrder(); /// Rteurn this JITDylib and its transitive dependencies in reverse DFS order /// based on linkage relationships. + /// + /// It is illegal to call this method on a defunct JITDylib and the client + /// is responsible for ensuring that they do not do so. std::vector<JITDylibSP> getReverseDFSLinkOrder(); private: @@ -1151,7 +1226,6 @@ private: JITDylib(ExecutionSession &ES, std::string Name); - ResourceTrackerSP getTracker(MaterializationResponsibility &MR); std::pair<AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>> removeTracker(ResourceTracker &RT); @@ -1197,8 +1271,8 @@ private: failSymbols(FailedSymbolsWorklist); ExecutionSession &ES; + enum { Open, Closing, Closed } State = Open; std::mutex GeneratorsMutex; - bool Open = true; SymbolTable Symbols; UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; @@ -1208,7 +1282,8 @@ private: // Map trackers to sets of symbols tracked. DenseMap<ResourceTracker *, SymbolNameVector> TrackerSymbols; - DenseMap<MaterializationResponsibility *, ResourceTracker *> MRTrackers; + DenseMap<ResourceTracker *, DenseSet<MaterializationResponsibility *>> + TrackerMRs; }; /// Platforms set up standard symbols and mediate interactions between dynamic @@ -1363,6 +1438,18 @@ public: /// If no Platform is attached this call is equivalent to createBareJITDylib. Expected<JITDylib &> createJITDylib(std::string Name); + /// Closes the given JITDylib. + /// + /// This method clears all resources held for the JITDylib, puts it in the + /// closed state, and clears all references held by the ExecutionSession and + /// other JITDylibs. No further code can be added to the JITDylib, and the + /// object will be freed once any remaining JITDylibSPs to it are destroyed. + /// + /// This method does *not* run static destructors. + /// + /// This method can only be called once for each JITDylib. + Error removeJITDylib(JITDylib &JD); + /// Set the error reporter function. ExecutionSession &setErrorReporter(ErrorReporter ReportError) { this->ReportError = std::move(ReportError); @@ -1574,9 +1661,9 @@ private: SymbolStringPtr InitSymbol) { auto &JD = RT.getJITDylib(); std::unique_ptr<MaterializationResponsibility> MR( - new MaterializationResponsibility(&JD, std::move(Symbols), + new MaterializationResponsibility(&RT, std::move(Symbols), std::move(InitSymbol))); - JD.MRTrackers[MR.get()] = &RT; + JD.TrackerMRs[&RT].insert(MR.get()); return MR; } @@ -1660,18 +1747,17 @@ private: JITDispatchHandlers; }; -inline ExecutionSession &MaterializationResponsibility::getExecutionSession() { - return JD->getExecutionSession(); +inline ExecutionSession & +MaterializationResponsibility::getExecutionSession() const { + return JD.getExecutionSession(); } template <typename Func> Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const { - return JD->getExecutionSession().runSessionLocked([&]() -> Error { - auto I = JD->MRTrackers.find(this); - assert(I != JD->MRTrackers.end() && "No tracker for this MR"); - if (I->second->isDefunct()) - return make_error<ResourceTrackerDefunct>(I->second); - F(I->second->getKeyUnsafe()); + return JD.getExecutionSession().runSessionLocked([&]() -> Error { + if (RT->isDefunct()) + return make_error<ResourceTrackerDefunct>(RT); + F(RT->getKeyUnsafe()); return Error::success(); }); } @@ -1679,14 +1765,17 @@ Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const { template <typename GeneratorT> GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) { auto &G = *DefGenerator; - std::lock_guard<std::mutex> Lock(GeneratorsMutex); - DefGenerators.push_back(std::move(DefGenerator)); + ES.runSessionLocked([&] { + assert(State == Open && "Cannot add generator to closed JITDylib"); + DefGenerators.push_back(std::move(DefGenerator)); + }); return G; } template <typename Func> auto JITDylib::withLinkOrderDo(Func &&F) -> decltype(F(std::declval<const JITDylibSearchOrder &>())) { + assert(State == Open && "Cannot use link order of closed JITDylib"); return ES.runSessionLocked([&]() { return F(LinkOrder); }); } @@ -1715,6 +1804,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU, }); return ES.runSessionLocked([&, this]() -> Error { + assert(State == Open && "JD is defunct"); + if (auto Err = defineImpl(*MU)) return Err; @@ -1756,6 +1847,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU, }); return ES.runSessionLocked([&, this]() -> Error { + assert(State == Open && "JD is defunct"); + if (auto Err = defineImpl(*MU)) return Err; @@ -1800,50 +1893,50 @@ private: // --------------------------------------------- inline MaterializationResponsibility::~MaterializationResponsibility() { - JD->getExecutionSession().OL_destroyMaterializationResponsibility(*this); + getExecutionSession().OL_destroyMaterializationResponsibility(*this); } inline SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const { - return JD->getExecutionSession().OL_getRequestedSymbols(*this); + return getExecutionSession().OL_getRequestedSymbols(*this); } inline Error MaterializationResponsibility::notifyResolved( const SymbolMap &Symbols) { - return JD->getExecutionSession().OL_notifyResolved(*this, Symbols); + return getExecutionSession().OL_notifyResolved(*this, Symbols); } inline Error MaterializationResponsibility::notifyEmitted() { - return JD->getExecutionSession().OL_notifyEmitted(*this); + return getExecutionSession().OL_notifyEmitted(*this); } inline Error MaterializationResponsibility::defineMaterializing( SymbolFlagsMap SymbolFlags) { - return JD->getExecutionSession().OL_defineMaterializing( - *this, std::move(SymbolFlags)); + return getExecutionSession().OL_defineMaterializing(*this, + std::move(SymbolFlags)); } inline void MaterializationResponsibility::failMaterialization() { - JD->getExecutionSession().OL_notifyFailed(*this); + getExecutionSession().OL_notifyFailed(*this); } inline Error MaterializationResponsibility::replace( std::unique_ptr<MaterializationUnit> MU) { - return JD->getExecutionSession().OL_replace(*this, std::move(MU)); + return getExecutionSession().OL_replace(*this, std::move(MU)); } inline Expected<std::unique_ptr<MaterializationResponsibility>> MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) { - return JD->getExecutionSession().OL_delegate(*this, Symbols); + return getExecutionSession().OL_delegate(*this, Symbols); } inline void MaterializationResponsibility::addDependencies( const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) { - JD->getExecutionSession().OL_addDependencies(*this, Name, Dependencies); + getExecutionSession().OL_addDependencies(*this, Name, Dependencies); } inline void MaterializationResponsibility::addDependenciesForAll( const SymbolDependenceMap &Dependencies) { - JD->getExecutionSession().OL_addDependenciesForAll(*this, Dependencies); + getExecutionSession().OL_addDependenciesForAll(*this, Dependencies); } } // End namespace orc diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 2fec3e7e4230..d2f9bac16e5a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -120,6 +120,10 @@ enum class OMPScheduleType { Runtime = 37, Auto = 38, // auto + StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd) + GuidedSimd = 46, // guided with chunk adjustment + RuntimeSimd = 47, // runtime with chunk adjustment + ModifierMonotonic = (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index b4e099e4ec20..bcf52278ccbb 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1670,32 +1670,6 @@ public: return CreateAlignedLoad(Ty, Ptr, MaybeAlign(), isVolatile, Name); } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr, - const char *Name), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name); - } - - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name); - } - - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr, - bool isVolatile, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, isVolatile, - Name); - } - StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) { return CreateAlignedStore(Val, Ptr, MaybeAlign(), isVolatile); } @@ -1719,35 +1693,6 @@ public: return Insert(new LoadInst(Ty, Ptr, Twine(), isVolatile, *Align), Name); } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr, - MaybeAlign Align, - const char *Name), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr, - Align, Name); - } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr, - MaybeAlign Align, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr, - Align, Name); - } - // Deprecated [opaque pointer types] - LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr, - MaybeAlign Align, - bool isVolatile, - const Twine &Name = ""), - "Use the version that explicitly specifies the " - "loaded type instead") { - return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr, - Align, isVolatile, Name); - } - StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile = false) { if (!Align) { @@ -1788,14 +1733,6 @@ public: return Insert(new AtomicRMWInst(Op, Ptr, Val, *Align, Ordering, SSID)); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateGEP(Value *Ptr, ArrayRef<Value *> IdxList, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateGEP(Ptr->getType()->getScalarType()->getPointerElementType(), - Ptr, IdxList, Name); - } - Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "") { if (auto *PC = dyn_cast<Constant>(Ptr)) { @@ -1810,15 +1747,6 @@ public: return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateInBoundsGEP( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, - Name); - } - Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "") { if (auto *PC = dyn_cast<Constant>(Ptr)) { @@ -1849,15 +1777,6 @@ public: return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstGEP1_32( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Name); - } - Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name = "") { Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0); @@ -1914,15 +1833,6 @@ public: return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstGEP1_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Name); - } - Value *CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name = "") { Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0); @@ -1933,15 +1843,6 @@ public: return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstInBoundsGEP1_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Name); - } - Value *CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name = "") { Value *Idxs[] = { @@ -1955,15 +1856,6 @@ public: return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1, - const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstGEP2_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Idx1, Name); - } - Value *CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name = "") { Value *Idxs[] = { @@ -1977,28 +1869,11 @@ public: return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, - uint64_t Idx1, const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstInBoundsGEP2_64( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0, - Idx1, Name); - } - Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name = "") { return CreateConstInBoundsGEP2_32(Ty, Ptr, 0, Idx, Name); } - LLVM_ATTRIBUTE_DEPRECATED( - Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = ""), - "Use the version with explicit element type instead") { - return CreateConstInBoundsGEP2_32( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, 0, Idx, - Name); - } - /// Same as CreateGlobalString, but return a pointer with "i8*" type /// instead of a pointer to array of i8. /// diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 6d32a898b668..046e9b5e809e 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -975,15 +975,6 @@ public: NameStr, InsertAtEnd); } - LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds( - Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr = "", - Instruction *InsertBefore = nullptr), - "Use the version with explicit element type instead") { - return CreateInBounds( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, - NameStr, InsertBefore); - } - /// Create an "inbounds" getelementptr. See the documentation for the /// "inbounds" flag in LangRef.html for details. static GetElementPtrInst * @@ -996,15 +987,6 @@ public: return GEP; } - LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds( - Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr, - BasicBlock *InsertAtEnd), - "Use the version with explicit element type instead") { - return CreateInBounds( - Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList, - NameStr, InsertAtEnd); - } - static GetElementPtrInst *CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr, diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 8290342c0d51..b01fa10763b8 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -524,6 +524,20 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; + // BCD intrinsics. + def int_ppc_bcdadd : GCCBuiltin<"__builtin_ppc_bcdadd">, Intrinsic< + [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_ppc_bcdadd_p : GCCBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic< + [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, ImmArg<ArgIndex<0>>]>; + def int_ppc_bcdsub : GCCBuiltin<"__builtin_ppc_bcdsub">, Intrinsic< + [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_ppc_bcdsub_p : GCCBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic< + [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, ImmArg<ArgIndex<0>>]>; + // P10 Vector Extract with Mask def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; @@ -1073,6 +1087,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". GCCBuiltin<"__builtin_altivec_crypto_vpermxor">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_crypto_vpermxor_be : + GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, + llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_crypto_vshasigmad : GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">, diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index b83d83f0d0ab..7d232bba0864 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -250,8 +250,16 @@ public: bool operator!=(const FastMathFlags &OtherFlags) const { return Flags != OtherFlags.Flags; } + + /// Print fast-math flags to \p O. + void print(raw_ostream &O) const; }; +inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) { + FMF.print(O); + return O; +} + /// Utility class for floating point operations which can have /// information about relaxed accuracy requirements attached to them. class FPMathOperator : public Operator { diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index b858733530e3..320deb80bb1f 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -2285,6 +2285,31 @@ m_Not(const ValTy &V) { return m_c_Xor(V, m_AllOnes()); } +template <typename ValTy> struct NotForbidUndef_match { + ValTy Val; + NotForbidUndef_match(const ValTy &V) : Val(V) {} + + template <typename OpTy> bool match(OpTy *V) { + // We do not use m_c_Xor because that could match an arbitrary APInt that is + // not -1 as C and then fail to match the other operand if it is -1. + // This code should still work even when both operands are constants. + Value *X; + const APInt *C; + if (m_Xor(m_Value(X), m_APIntForbidUndef(C)).match(V) && C->isAllOnes()) + return Val.match(X); + if (m_Xor(m_APIntForbidUndef(C), m_Value(X)).match(V) && C->isAllOnes()) + return Val.match(X); + return false; + } +}; + +/// Matches a bitwise 'not' as 'xor V, -1' or 'xor -1, V'. For vectors, the +/// constant value must be composed of only -1 scalar elements. +template <typename ValTy> +inline NotForbidUndef_match<ValTy> m_NotForbidUndef(const ValTy &V) { + return NotForbidUndef_match<ValTy>(V); +} + /// Matches an SMin with LHS and RHS in either order. template <typename LHS, typename RHS> inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true> diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 47431adc6fac..c899c46d4055 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -368,6 +368,8 @@ public: Type *getPointerElementType() const { assert(getTypeID() == PointerTyID); + assert(NumContainedTys && + "Attempting to get element type of opaque pointer"); return ContainedTys[0]; } diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 361d6357b303..a3c6b4e70bf5 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -38,7 +38,7 @@ // is one VP intrinsic that maps directly to one SDNode that goes by the // same name. Since the operands are also the same, we open the property // scopes for both the VPIntrinsic and the SDNode at once. -// \p SDOPC The SelectionDAG Node id (eg VP_ADD). +// \p VPSD The SelectionDAG Node id (eg VP_ADD). // \p LEGALPOS The operand position of the SDNode that is used for legalizing // this SDNode. This can be `-1`, in which case the return type of // the SDNode is used. @@ -46,12 +46,12 @@ // \p MASKPOS The mask operand position. // \p EVLPOS The explicit vector length operand position. #ifndef BEGIN_REGISTER_VP_SDNODE -#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) +#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) #endif // End the property scope of a new VP SDNode. #ifndef END_REGISTER_VP_SDNODE -#define END_REGISTER_VP_SDNODE(SDOPC) +#define END_REGISTER_VP_SDNODE(VPSD) #endif // Helper macros for the common "1:1 - Intrinsic : SDNode" case. @@ -60,22 +60,21 @@ // same name. Since the operands are also the same, we open the property // scopes for both the VPIntrinsic and the SDNode at once. // -// \p INTRIN The canonical name (eg `vp_add`, which at the same time is the +// \p VPID The canonical name (eg `vp_add`, which at the same time is the // name of the intrinsic and the TableGen def of the SDNode). // \p MASKPOS The mask operand position. // \p EVLPOS The explicit vector length operand position. -// \p SDOPC The SelectionDAG Node id (eg VP_ADD). +// \p VPSD The SelectionDAG Node id (eg VP_ADD). // \p LEGALPOS The operand position of the SDNode that is used for legalizing // this SDNode. This can be `-1`, in which case the return type of // the SDNode is used. -#define BEGIN_REGISTER_VP(INTRIN, MASKPOS, EVLPOS, SDOPC, LEGALPOS) \ -BEGIN_REGISTER_VP_INTRINSIC(INTRIN, MASKPOS, EVLPOS) \ -BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, INTRIN, MASKPOS, EVLPOS) - -#define END_REGISTER_VP(INTRIN, SDOPC) \ -END_REGISTER_VP_INTRINSIC(INTRIN) \ -END_REGISTER_VP_SDNODE(SDOPC) +#define BEGIN_REGISTER_VP(VPID, MASKPOS, EVLPOS, VPSD, LEGALPOS) \ + BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS) \ + BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS) +#define END_REGISTER_VP(VPID, VPSD) \ + END_REGISTER_VP_INTRINSIC(VPID) \ + END_REGISTER_VP_SDNODE(VPSD) // The following macros attach properties to the scope they are placed in. This // assigns the property to the VP Intrinsic and/or SDNode that belongs to the @@ -84,9 +83,9 @@ END_REGISTER_VP_SDNODE(SDOPC) // Property Macros { // The intrinsic and/or SDNode has the same function as this LLVM IR Opcode. -// \p OPC The standard IR opcode. -#ifndef HANDLE_VP_TO_OPC -#define HANDLE_VP_TO_OPC(OPC) +// \p OPC The opcode of the instruction with the same function. +#ifndef VP_PROPERTY_FUNCTIONAL_OPC +#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) #endif // Whether the intrinsic may have a rounding mode or exception behavior operand @@ -96,34 +95,30 @@ END_REGISTER_VP_SDNODE(SDOPC) // \p HASEXCEPT '1' if the intrinsic can have an exception behavior operand // bundle, '0' otherwise. // \p INTRINID The constrained fp intrinsic this VP intrinsic corresponds to. -#ifndef HANDLE_VP_TO_CONSTRAINEDFP -#define HANDLE_VP_TO_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID) +#ifndef VP_PROPERTY_CONSTRAINEDFP +#define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID) #endif // Map this VP intrinsic to its canonical functional intrinsic. -#ifndef HANDLE_VP_TO_INTRIN -#define HANDLE_VP_TO_INTRIN(ID) +// \p INTRIN The non-VP intrinsics with the same function. +#ifndef VP_PROPERTY_FUNCTIONAL_INTRINSIC +#define VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) #endif // This VP Intrinsic is a memory operation // The pointer arg is at POINTERPOS and the data arg is at DATAPOS. -#ifndef HANDLE_VP_IS_MEMOP -#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) +#ifndef VP_PROPERTY_MEMOP +#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS) #endif // Map this VP reduction intrinsic to its reduction operand positions. -#ifndef HANDLE_VP_REDUCTION -#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS) +#ifndef VP_PROPERTY_REDUCTION +#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) #endif // A property to infer VP binary-op SDNode opcodes automatically. -#ifndef PROPERTY_VP_BINARYOP_SDNODE -#define PROPERTY_VP_BINARYOP_SDNODE(ID) -#endif - -// A property to infer VP reduction SDNode opcodes automatically. -#ifndef PROPERTY_VP_REDUCTION_SDNODE -#define PROPERTY_VP_REDUCTION_SDNODE(ID) +#ifndef VP_PROPERTY_BINARYOP +#define VP_PROPERTY_BINARYOP #endif /// } Property Macros @@ -132,15 +127,14 @@ END_REGISTER_VP_SDNODE(SDOPC) // Specialized helper macro for integer binary operators (%x, %y, %mask, %evl). #ifdef HELPER_REGISTER_BINARY_INT_VP -#error "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!" +#error \ + "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!" #endif -#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \ -BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \ -HANDLE_VP_TO_OPC(OPC) \ -PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ -END_REGISTER_VP(INTRIN, SDOPC) - - +#define HELPER_REGISTER_BINARY_INT_VP(VPID, VPSD, IROPC) \ + BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_BINARYOP \ + END_REGISTER_VP(VPID, VPSD) // llvm.vp.add(x,y,mask,vlen) HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add) @@ -193,12 +187,12 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor) #error \ "The internal helper macro HELPER_REGISTER_BINARY_FP_VP is already defined!" #endif -#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, SDOPC, OPC) \ - BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1) \ - HANDLE_VP_TO_OPC(OPC) \ - HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \ - PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ - END_REGISTER_VP(vp_##OPSUFFIX, SDOPC) +#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, VPSD, IROPC) \ + BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \ + VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \ + VP_PROPERTY_BINARYOP \ + END_REGISTER_VP(vp_##OPSUFFIX, VPSD) // llvm.vp.fadd(x,y,mask,vlen) HELPER_REGISTER_BINARY_FP_VP(fadd, VP_FADD, FAdd) @@ -224,34 +218,34 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem) BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3) // chain = VP_STORE chain,val,base,offset,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5) -HANDLE_VP_TO_OPC(Store) -HANDLE_VP_TO_INTRIN(masked_store) -HANDLE_VP_IS_MEMOP(vp_store, 1, 0) +VP_PROPERTY_FUNCTIONAL_OPC(Store) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_store) +VP_PROPERTY_MEMOP(1, 0) END_REGISTER_VP(vp_store, VP_STORE) // llvm.vp.scatter(ptr,val,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3) // chain = VP_SCATTER chain,val,base,indices,scale,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6) -HANDLE_VP_TO_INTRIN(masked_scatter) -HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_scatter) +VP_PROPERTY_MEMOP(1, 0) END_REGISTER_VP(vp_scatter, VP_SCATTER) // llvm.vp.load(ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2) // val,chain = VP_LOAD chain,base,offset,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4) -HANDLE_VP_TO_OPC(Load) -HANDLE_VP_TO_INTRIN(masked_load) -HANDLE_VP_IS_MEMOP(vp_load, 0, None) +VP_PROPERTY_FUNCTIONAL_OPC(Load) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load) +VP_PROPERTY_MEMOP(0, None) END_REGISTER_VP(vp_load, VP_LOAD) // llvm.vp.gather(ptr,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2) // val,chain = VP_GATHER chain,base,indices,scale,mask,evl BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5) -HANDLE_VP_TO_INTRIN(masked_gather) -HANDLE_VP_IS_MEMOP(vp_gather, 0, None) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_gather) +VP_PROPERTY_MEMOP(0, None) END_REGISTER_VP(vp_gather, VP_GATHER) ///// } Memory Operations @@ -260,14 +254,14 @@ END_REGISTER_VP(vp_gather, VP_GATHER) // Specialized helper macro for VP reductions (%start, %x, %mask, %evl). #ifdef HELPER_REGISTER_REDUCTION_VP -#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!" +#error \ + "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!" #endif -#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \ -BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \ -HANDLE_VP_TO_INTRIN(INTRIN) \ -HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \ -PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ -END_REGISTER_VP(VPINTRIN, SDOPC) +#define HELPER_REGISTER_REDUCTION_VP(VPID, VPSD, INTRIN) \ + BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \ + VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \ + VP_PROPERTY_REDUCTION(0, 1) \ + END_REGISTER_VP(VPID, VPSD) // llvm.vp.reduce.add(start,x,mask,vlen) HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD, @@ -320,19 +314,19 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN, // fast-math flags in the IR and as two distinct ISD opcodes in the // SelectionDAG. #ifdef HELPER_REGISTER_REDUCTION_SEQ_VP -#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!" +#error \ + "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!" #endif -#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \ -BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \ -BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \ -END_REGISTER_VP_SDNODE(SDOPC) \ -BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \ -END_REGISTER_VP_SDNODE(SEQ_SDOPC) \ -HANDLE_VP_TO_INTRIN(INTRIN) \ -HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \ -PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ -PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \ -END_REGISTER_VP_INTRINSIC(VPINTRIN) +#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, INTRIN) \ + BEGIN_REGISTER_VP_INTRINSIC(VPID, 2, 3) \ + BEGIN_REGISTER_VP_SDNODE(VPSD, -1, VPID, 2, 3) \ + VP_PROPERTY_REDUCTION(0, 1) \ + END_REGISTER_VP_SDNODE(VPSD) \ + BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, -1, VPID, 2, 3) \ + VP_PROPERTY_REDUCTION(0, 1) \ + END_REGISTER_VP_SDNODE(SEQ_VPSD) \ + VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \ + END_REGISTER_VP_INTRINSIC(VPID) // llvm.vp.reduce.fadd(start,x,mask,vlen) HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD, @@ -356,8 +350,7 @@ BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3) // END_REGISTER_CASES(vp_select, VP_SELECT) END_REGISTER_VP_INTRINSIC(vp_select) -BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, - EXPERIMENTAL_VP_SPLICE, -1) +BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1) END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) ///// } Shuffles @@ -368,10 +361,9 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) #undef END_REGISTER_VP #undef END_REGISTER_VP_INTRINSIC #undef END_REGISTER_VP_SDNODE -#undef HANDLE_VP_TO_OPC -#undef HANDLE_VP_TO_CONSTRAINEDFP -#undef HANDLE_VP_TO_INTRIN -#undef HANDLE_VP_IS_MEMOP -#undef HANDLE_VP_REDUCTION -#undef PROPERTY_VP_BINARYOP_SDNODE -#undef PROPERTY_VP_REDUCTION_SDNODE +#undef VP_PROPERTY_BINARYOP +#undef VP_PROPERTY_CONSTRAINEDFP +#undef VP_PROPERTY_FUNCTIONAL_INTRINSIC +#undef VP_PROPERTY_FUNCTIONAL_OPC +#undef VP_PROPERTY_MEMOP +#undef VP_PROPERTY_REDUCTION diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h index f4381d2ae4a9..52a4c7b4301f 100644 --- a/llvm/include/llvm/IR/Verifier.h +++ b/llvm/include/llvm/IR/Verifier.h @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines the function verifier interface, that can be used for some -// sanity checking of input to the system, and for checking that transformations -// haven't done something bad. +// This file defines the function verifier interface, that can be used for +// validation checking of input to the system, and for checking that +// transformations haven't done something bad. // // Note that this does not provide full 'java style' security and verifications, // instead it just tries to ensure that code is well formed. diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h index 2b0f391570cd..8c0ad2699b8d 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h +++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h @@ -264,9 +264,10 @@ public: // Update the ready queues. void dump() const; - // This routine performs a sanity check. This routine should only be called - // when we know that 'IR' is not in the scheduler's instruction queues. - void sanityCheck(const InstRef &IR) const { + // This routine performs a basic correctness check. This routine should only + // be called when we know that 'IR' is not in the scheduler's instruction + // queues. + void instructionCheck(const InstRef &IR) const { assert(!is_contained(WaitSet, IR) && "Already in the wait set!"); assert(!is_contained(ReadySet, IR) && "Already in the ready set!"); assert(!is_contained(IssuedSet, IR) && "Already executing!"); diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index ee89f4eac61f..38a7de3d6131 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -121,6 +121,7 @@ struct LinkEditData { MachOYAML::ExportEntry ExportTrie; std::vector<NListEntry> NameList; std::vector<StringRef> StringTable; + std::vector<yaml::Hex32> IndirectSymbols; bool isEmpty() const; }; diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc new file mode 100644 index 000000000000..d64227e4ba31 --- /dev/null +++ b/llvm/include/llvm/ProfileData/MemProfData.inc @@ -0,0 +1,61 @@ +#ifndef MEMPROF_DATA_INC +#define MEMPROF_DATA_INC +/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\ +|* +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +|* See https://llvm.org/LICENSE.txt for license information. +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +|* +\*===----------------------------------------------------------------------===*/ +/* + * This is the main file that defines all the data structure, signature, + * constant literals that are shared across profiling runtime library, + * and host tools (reader/writer). + * + * This file has two identical copies. The primary copy lives in LLVM and + * the other one sits in compiler-rt/include/profile directory. To make changes + * in this file, first modify the primary copy and copy it over to compiler-rt. + * Testing of any change in this file can start only after the two copies are + * synced up. + * +\*===----------------------------------------------------------------------===*/ + + +#ifdef _MSC_VER +#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop)) +#else +#define PACKED(__decl__) __decl__ __attribute__((__packed__)) +#endif + +// A 64-bit magic number to uniquely identify the raw binary memprof profile file. +#define MEMPROF_RAW_MAGIC_64 \ + ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \ + (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129) + +// The version number of the raw binary format. +#define MEMPROF_RAW_VERSION 1ULL + +namespace llvm { +namespace memprof { +// A struct describing the header used for the raw binary memprof profile format. +PACKED(struct Header { + uint64_t Magic; + uint64_t Version; + uint64_t TotalSize; + uint64_t SegmentOffset; + uint64_t MIBOffset; + uint64_t StackOffset; +}); + +// A struct describing the information necessary to describe a /proc/maps +// segment entry for a particular binary/library identified by its build id. +PACKED(struct SegmentEntry { + uint64_t Start; + uint64_t End; + uint64_t Offset; + uint8_t BuildId[32]; +}); +} // namespace memprof +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h new file mode 100644 index 000000000000..45544927a86f --- /dev/null +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -0,0 +1,43 @@ +#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ +//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading MemProf profiling data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { +namespace memprof { + +class RawMemProfReader { +public: + RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) + : DataBuffer(std::move(DataBuffer)) {} + // Prints aggregate counts for each raw profile parsed from the DataBuffer. + void printSummaries(raw_ostream &OS) const; + + // Return true if the \p DataBuffer starts with magic bytes indicating it is + // a raw binary memprof profile. + static bool hasFormat(const MemoryBuffer &DataBuffer); + + // Create a RawMemProfReader after sanity checking the contents of the file at + // \p Path. + static Expected<std::unique_ptr<RawMemProfReader>> create(const Twine &Path); + +private: + std::unique_ptr<MemoryBuffer> DataBuffer; +}; + +} // namespace memprof +} // namespace llvm + +#endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_ diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index b3cfb71601f1..48e82fa55a0f 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -181,7 +181,8 @@ AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, (AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM | AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML | - AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16)) + AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_BF16)) AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_LSE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index 131a58412db6..15bb428f19bc 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -137,15 +137,6 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values); bool isX18ReservedByDefault(const Triple &TT); -struct ParsedBranchProtection { - StringRef Scope; - StringRef Key; - bool BranchTargetEnforcement; -}; - -bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, - StringRef &Err); - } // namespace AArch64 } // namespace llvm diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h index 5d12b7e08d58..b46a4d9f690f 100644 --- a/llvm/include/llvm/Support/ARMAttributeParser.h +++ b/llvm/include/llvm/Support/ARMAttributeParser.h @@ -67,6 +67,10 @@ class ARMAttributeParser : public ELFAttributeParser { Error DSP_extension(ARMBuildAttrs::AttrType tag); Error T2EE_use(ARMBuildAttrs::AttrType tag); Error Virtualization_use(ARMBuildAttrs::AttrType tag); + Error PAC_extension(ARMBuildAttrs::AttrType tag); + Error BTI_extension(ARMBuildAttrs::AttrType tag); + Error PACRET_use(ARMBuildAttrs::AttrType tag); + Error BTI_use(ARMBuildAttrs::AttrType tag); Error nodefaults(ARMBuildAttrs::AttrType tag); public: diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h index 37c37522fd26..b4405e7d4908 100644 --- a/llvm/include/llvm/Support/ARMBuildAttributes.h +++ b/llvm/include/llvm/Support/ARMBuildAttributes.h @@ -70,9 +70,13 @@ enum AttrType : unsigned { DIV_use = 44, DSP_extension = 46, MVE_arch = 48, + PAC_extension = 50, + BTI_extension = 52, also_compatible_with = 65, conformance = 67, Virtualization_use = 68, + BTI_use = 74, + PACRET_use = 76, /// Legacy Tags Section = 2, // deprecated (ABI r2.09) @@ -237,7 +241,25 @@ enum { // Tag_Virtualization_use, (=68), uleb128 AllowTZ = 1, AllowVirtualization = 2, - AllowTZVirtualization = 3 + AllowTZVirtualization = 3, + + // Tag_PAC_extension, (=50), uleb128 + DisallowPAC = 0, + AllowPACInNOPSpace = 1, + AllowPAC = 2, + + // Tag_BTI_extension, (=52), uleb128 + DisallowBTI = 0, + AllowBTIInNOPSpace = 1, + AllowBTI = 2, + + // Tag_BTI_use, (=74), uleb128 + BTINotUsed = 0, + BTIUsed = 1, + + // Tag_PACRET_use, (=76), uleb128 + PACRETNotUsed = 0, + PACRETUsed = 1 }; } // namespace ARMBuildAttrs diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index fd08f3e6960c..7d29808f0501 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -201,6 +201,7 @@ ARM_ARCH_EXT_NAME("cdecp4", ARM::AEK_CDECP4, "+cdecp4", "-cdecp4") ARM_ARCH_EXT_NAME("cdecp5", ARM::AEK_CDECP5, "+cdecp5", "-cdecp5") ARM_ARCH_EXT_NAME("cdecp6", ARM::AEK_CDECP6, "+cdecp6", "-cdecp6") ARM_ARCH_EXT_NAME("cdecp7", ARM::AEK_CDECP7, "+cdecp7", "-cdecp7") +ARM_ARCH_EXT_NAME("pacbti", ARM::AEK_PACBTI, "+pacbti", "-pacbti") #undef ARM_ARCH_EXT_NAME #ifndef ARM_HW_DIV_NAME diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h index b1ffcfb34552..b40704c24e87 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.h +++ b/llvm/include/llvm/Support/ARMTargetParser.h @@ -59,7 +59,7 @@ enum ArchExtKind : uint64_t { AEK_CDECP5 = 1 << 27, AEK_CDECP6 = 1 << 28, AEK_CDECP7 = 1 << 29, - + AEK_PACBTI = 1 << 30, // Unsupported extensions. AEK_OS = 1ULL << 59, AEK_IWMMXT = 1ULL << 60, diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index 21fd50763b1f..f39400c26eab 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -528,9 +528,9 @@ protected: /// of CFG edges must not delete the CFG nodes before calling this function. /// /// The applyUpdates function can reorder the updates and remove redundant - /// ones internally. The batch updater is also able to detect sequences of - /// zero and exactly one update -- it's optimized to do less work in these - /// cases. + /// ones internally (as long as it is done in a deterministic fashion). The + /// batch updater is also able to detect sequences of zero and exactly one + /// update -- it's optimized to do less work in these cases. /// /// Note that for postdominators it automatically takes care of applying /// updates on reverse edges internally (so there's no need to swap the @@ -538,8 +538,8 @@ protected: /// The type of updates is the same for DomTreeBase<T> and PostDomTreeBase<T> /// with the same template parameter T. /// - /// \param Updates An unordered sequence of updates to perform. The current - /// CFG and the reverse of these updates provides the pre-view of the CFG. + /// \param Updates An ordered sequence of updates to perform. The current CFG + /// and the reverse of these updates provides the pre-view of the CFG. /// void applyUpdates(ArrayRef<UpdateType> Updates) { GraphDiff<NodePtr, IsPostDominator> PreViewCFG( @@ -547,9 +547,9 @@ protected: DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr); } - /// \param Updates An unordered sequence of updates to perform. The current - /// CFG and the reverse of these updates provides the pre-view of the CFG. - /// \param PostViewUpdates An unordered sequence of update to perform in order + /// \param Updates An ordered sequence of updates to perform. The current CFG + /// and the reverse of these updates provides the pre-view of the CFG. + /// \param PostViewUpdates An ordered sequence of update to perform in order /// to obtain a post-view of the CFG. The DT will be updated assuming the /// obtained PostViewCFG is the desired end state. void applyUpdates(ArrayRef<UpdateType> Updates, diff --git a/llvm/include/llvm/Support/HTTPClient.h b/llvm/include/llvm/Support/HTTPClient.h new file mode 100644 index 000000000000..3172610c2d8b --- /dev/null +++ b/llvm/include/llvm/Support/HTTPClient.h @@ -0,0 +1,113 @@ +//===-- llvm/Support/HTTPClient.h - HTTP client library ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declarations of the HTTPClient, HTTPMethod, +/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as +/// the HTTPResponseBuffer and HTTPRequest structs. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HTTP_CLIENT_H +#define LLVM_SUPPORT_HTTP_CLIENT_H + +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { + +enum class HTTPMethod { GET }; + +/// A stateless description of an outbound HTTP request. +struct HTTPRequest { + SmallString<128> Url; + HTTPMethod Method = HTTPMethod::GET; + bool FollowRedirects = true; + HTTPRequest(StringRef Url); +}; + +bool operator==(const HTTPRequest &A, const HTTPRequest &B); + +/// A handler for state updates occurring while an HTTPRequest is performed. +/// Can trigger the client to abort the request by returning an Error from any +/// of its methods. +class HTTPResponseHandler { +public: + /// Processes one line of HTTP response headers. + virtual Error handleHeaderLine(StringRef HeaderLine) = 0; + + /// Processes an additional chunk of bytes of the HTTP response body. + virtual Error handleBodyChunk(StringRef BodyChunk) = 0; + + /// Processes the HTTP response status code. + virtual Error handleStatusCode(unsigned Code) = 0; + +protected: + ~HTTPResponseHandler(); +}; + +/// An HTTP response status code bundled with a buffer to store the body. +struct HTTPResponseBuffer { + unsigned Code = 0; + std::unique_ptr<WritableMemoryBuffer> Body; +}; + +/// A simple handler which writes returned data to an HTTPResponseBuffer. +/// Ignores all headers except the Content-Length, which it uses to +/// allocate an appropriately-sized Body buffer. +class BufferedHTTPResponseHandler final : public HTTPResponseHandler { + size_t Offset = 0; + +public: + /// Stores the data received from the HTTP server. + HTTPResponseBuffer ResponseBuffer; + + /// These callbacks store the body and status code in an HTTPResponseBuffer + /// allocated based on Content-Length. The Content-Length header must be + /// handled by handleHeaderLine before any calls to handleBodyChunk. + Error handleHeaderLine(StringRef HeaderLine) override; + Error handleBodyChunk(StringRef BodyChunk) override; + Error handleStatusCode(unsigned Code) override; +}; + +/// A reusable client that can perform HTTPRequests through a network socket. +class HTTPClient { +public: + HTTPClient(); + ~HTTPClient(); + + /// Returns true only if LLVM has been compiled with a working HTTPClient. + static bool isAvailable(); + + /// Must be called at the beginning of a program, while it is a single thread. + static void initialize(); + + /// Must be called at the end of a program, while it is a single thread. + static void cleanup(); + + /// Sets the timeout for the entire request, in milliseconds. A zero or + /// negative value means the request never times out. + void setTimeout(std::chrono::milliseconds Timeout); + + /// Performs the Request, passing response data to the Handler. Returns all + /// errors which occur during the request. Aborts if an error is returned by a + /// Handler method. + Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler); + + /// Performs the Request with the default BufferedHTTPResponseHandler, and + /// returns its HTTPResponseBuffer or an Error. + Expected<HTTPResponseBuffer> perform(const HTTPRequest &Request); + + /// Performs an HTTPRequest with the default configuration to make a GET + /// request to the given Url. Returns an HTTPResponseBuffer or an Error. + Expected<HTTPResponseBuffer> get(StringRef Url); +}; + +} // end namespace llvm + +#endif // LLVM_SUPPORT_HTTP_CLIENT_H diff --git a/llvm/include/llvm/Support/Mutex.h b/llvm/include/llvm/Support/Mutex.h index 1d8a0d3c87cb..d73bb8ef1120 100644 --- a/llvm/include/llvm/Support/Mutex.h +++ b/llvm/include/llvm/Support/Mutex.h @@ -36,7 +36,7 @@ namespace llvm return true; } else { // Single-threaded debugging code. This would be racy in - // multithreaded mode, but provides not sanity checks in single + // multithreaded mode, but provides not basic checks in single // threaded mode. ++acquired; return true; @@ -49,7 +49,7 @@ namespace llvm return true; } else { // Single-threaded debugging code. This would be racy in - // multithreaded mode, but provides not sanity checks in single + // multithreaded mode, but provides not basic checks in single // threaded mode. assert(acquired && "Lock not acquired before release!"); --acquired; diff --git a/llvm/include/llvm/Support/RWMutex.h b/llvm/include/llvm/Support/RWMutex.h index 150bc7dbbce1..33a5d3efffee 100644 --- a/llvm/include/llvm/Support/RWMutex.h +++ b/llvm/include/llvm/Support/RWMutex.h @@ -114,7 +114,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. ++readers; return true; } @@ -126,7 +126,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. assert(readers > 0 && "Reader lock not acquired before release!"); --readers; return true; @@ -139,7 +139,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. assert(writers == 0 && "Writer lock already acquired!"); ++writers; return true; @@ -152,7 +152,7 @@ public: } // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. + // mode, but provides not basic checks in single threaded mode. assert(writers == 1 && "Writer lock not acquired before release!"); --writers; return true; diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index 366dd3cf55c6..b11467dcce28 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -177,6 +177,18 @@ StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64); } // namespace RISCV +namespace ARM { +struct ParsedBranchProtection { + StringRef Scope; + StringRef Key; + bool BranchTargetEnforcement; +}; + +bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP, + StringRef &Err); + +} // namespace ARM + } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h index 4c41b88d6043..8d30e8e92755 100644 --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -36,9 +36,6 @@ namespace llvm { /// for some work to become available. class ThreadPool { public: - using TaskTy = std::function<void()>; - using PackagedTaskTy = std::packaged_task<void()>; - /// Construct a pool using the hardware strategy \p S for mapping hardware /// execution resources (threads, cores, CPUs) /// Defaults to using the maximum execution resources in the system, but @@ -51,17 +48,17 @@ public: /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template <typename Function, typename... Args> - inline std::shared_future<void> async(Function &&F, Args &&... ArgList) { + inline auto async(Function &&F, Args &&...ArgList) { auto Task = std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...); - return asyncImpl(std::move(Task)); + return async(std::move(Task)); } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. - template <typename Function> - inline std::shared_future<void> async(Function &&F) { - return asyncImpl(std::forward<Function>(F)); + template <typename Func> + auto async(Func &&F) -> std::shared_future<decltype(F())> { + return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F))); } /// Blocking wait for all the threads to complete and the queue to be empty. @@ -74,17 +71,70 @@ public: bool isWorkerThread() const; private: + /// Helpers to create a promise and a callable wrapper of \p Task that sets + /// the result of the promise. Returns the callable and a future to access the + /// result. + template <typename ResTy> + static std::pair<std::function<void()>, std::future<ResTy>> + createTaskAndFuture(std::function<ResTy()> Task) { + std::shared_ptr<std::promise<ResTy>> Promise = + std::make_shared<std::promise<ResTy>>(); + auto F = Promise->get_future(); + return { + [Promise = std::move(Promise), Task]() { Promise->set_value(Task()); }, + std::move(F)}; + } + static std::pair<std::function<void()>, std::future<void>> + createTaskAndFuture(std::function<void()> Task) { + std::shared_ptr<std::promise<void>> Promise = + std::make_shared<std::promise<void>>(); + auto F = Promise->get_future(); + return {[Promise = std::move(Promise), Task]() { + Task(); + Promise->set_value(); + }, + std::move(F)}; + } + bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. - std::shared_future<void> asyncImpl(TaskTy F); + template <typename ResTy> + std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task) { + +#if LLVM_ENABLE_THREADS + /// Wrap the Task in a std::function<void()> that sets the result of the + /// corresponding future. + auto R = createTaskAndFuture(Task); + + { + // Lock the queue and push the new task + std::unique_lock<std::mutex> LockGuard(QueueLock); + + // Don't allow enqueueing after disabling the pool + assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); + Tasks.push(std::move(R.first)); + } + QueueCondition.notify_one(); + return R.second.share(); + +#else // LLVM_ENABLE_THREADS Disabled + + // Get a Future with launch::deferred execution using std::async + auto Future = std::async(std::launch::deferred, std::move(Task)).share(); + // Wrap the future so that both ThreadPool::wait() can operate and the + // returned future can be sync'ed on. + Tasks.push([Future]() { Future.get(); }); + return Future; +#endif + } /// Threads in flight std::vector<llvm::thread> Threads; /// Tasks waiting for execution in the pool. - std::queue<PackagedTaskTy> Tasks; + std::queue<std::function<void()>> Tasks; /// Locking and signaling for accessing the Tasks queue. std::mutex QueueLock; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index e2d3dbdda88a..1d189c6dea6d 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -645,6 +645,13 @@ def extract_vec_elt_combines : GICombineGroup<[ extract_vec_elt_build_vec, extract_all_elts_from_build_vector]>; +def funnel_shift_from_or_shift : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_OR):$root, + [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }]) +>; + def funnel_shift_to_rotate : GICombineRule< (defs root:$root), (match (wip_match_opcode G_FSHL, G_FSHR):$root, @@ -683,7 +690,8 @@ def bitfield_extract_from_and : GICombineRule< [{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; -def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>; +def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift, + funnel_shift_to_rotate]>; def bitfield_extract_from_sext_inreg : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), @@ -751,6 +759,84 @@ def redundant_neg_operands: GICombineRule< [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +// Transform (fadd x, (fmul y, z)) -> (fma y, z, x) +// (fadd x, (fmul y, z)) -> (fmad y, z, x) +// Transform (fadd (fmul x, y), z) -> (fma x, y, z) +// (fadd (fmul x, y), z) -> (fmad x, y, z) +def combine_fadd_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) +// -> (fmad (fpext x), (fpext y), z) +// Transform (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) +// -> (fmad (fpext y), (fpext z), x) +def combine_fadd_fpext_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fadd (fma x, y, (fmul z, u)), v) -> (fma x, y, (fma z, u, v)) +// (fadd (fmad x, y, (fmul z, u)), v) -> (fmad x, y, (fmad z, u, v)) +// Transform (fadd v, (fma x, y, (fmul z, u))) -> (fma x, y, (fma z, u, v)) +// (fadd v, (fmad x, y, (fmul z, u))) -> (fmad x, y, (fmad z, u, v)) +def combine_fadd_fma_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFMAFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fadd (fma x, y, (fpext (fmul u, v))), z) -> +// (fma x, y, (fma (fpext u), (fpext v), z)) +def combine_fadd_fpext_fma_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FADD):$root, + [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMAAggressive( + *${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fmul x, y), z) -> (fma x, y, -z) +// -> (fmad x, y, -z) +def combine_fsub_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) +// (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x) +def combine_fsub_fneg_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFNegFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fpext (fmul x, y)), z) -> +// (fma (fpext x), (fpext y), (fneg z)) +def combine_fsub_fpext_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFpExtFMulToFMadOrFMA(*${root}, + ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +// Transform (fsub (fneg (fpext (fmul x, y))), z) -> +// (fneg (fma (fpext x), (fpext y), z)) +def combine_fsub_fpext_fneg_fmul_to_fmad_or_fma: GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchCombineFSubFpExtFNegFMulToFMadOrFMA( + *${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -783,6 +869,12 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>; def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, mul_by_neg_one]>; +def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma, + combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma, + combine_fadd_fpext_fma_fmul_to_fmad_or_fma, combine_fsub_fmul_to_fmad_or_fma, + combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma, + combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>; + def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, phi_combines, @@ -799,7 +891,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, truncstore_merge, div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, - and_or_disjoint_mask ]>; + and_or_disjoint_mask, fma_combines]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index 6e45f8f6fb05..429fcbd81b45 100644 --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -24,22 +24,47 @@ using namespace sampleprof; namespace llvm { namespace sampleprof { +struct ProfiledCallGraphNode; + +struct ProfiledCallGraphEdge { + ProfiledCallGraphEdge(ProfiledCallGraphNode *Source, + ProfiledCallGraphNode *Target, uint64_t Weight) + : Source(Source), Target(Target), Weight(Weight) {} + ProfiledCallGraphNode *Source; + ProfiledCallGraphNode *Target; + uint64_t Weight; + + // The call destination is the only important data here, + // allow to transparently unwrap into it. + operator ProfiledCallGraphNode *() const { return Target; } +}; + struct ProfiledCallGraphNode { - ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {} - StringRef Name; - struct ProfiledCallGraphNodeComparer { - bool operator()(const ProfiledCallGraphNode *L, - const ProfiledCallGraphNode *R) const { - return L->Name < R->Name; + // Sort edges by callee names only since all edges to be compared are from + // same caller. Edge weights are not considered either because for the same + // callee only the edge with the largest weight is added to the edge set. + struct ProfiledCallGraphEdgeComparer { + bool operator()(const ProfiledCallGraphEdge &L, + const ProfiledCallGraphEdge &R) const { + return L.Target->Name < R.Target->Name; } }; - std::set<ProfiledCallGraphNode *, ProfiledCallGraphNodeComparer> Callees; + + using iterator = std::set<ProfiledCallGraphEdge>::iterator; + using const_iterator = std::set<ProfiledCallGraphEdge>::const_iterator; + using edge = ProfiledCallGraphEdge; + using edges = std::set<ProfiledCallGraphEdge, ProfiledCallGraphEdgeComparer>; + + ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {} + + StringRef Name; + edges Edges; }; class ProfiledCallGraph { public: - using iterator = std::set<ProfiledCallGraphNode *>::iterator; + using iterator = std::set<ProfiledCallGraphEdge>::iterator; // Constructor for non-CS profile. ProfiledCallGraph(SampleProfileMap &ProfileMap) { @@ -63,8 +88,9 @@ public: while (!Queue.empty()) { ContextTrieNode *Caller = Queue.front(); Queue.pop(); - // Add calls for context. When AddNodeWithSamplesOnly is true, both caller - // and callee need to have context profile. + FunctionSamples *CallerSamples = Caller->getFunctionSamples(); + + // Add calls for context. // Note that callsite target samples are completely ignored since they can // conflict with the context edges, which are formed by context // compression during profile generation, for cyclic SCCs. This may @@ -74,31 +100,61 @@ public: ContextTrieNode *Callee = &Child.second; addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); + + // Fetch edge weight from the profile. + uint64_t Weight; + FunctionSamples *CalleeSamples = Callee->getFunctionSamples(); + if (!CalleeSamples || !CallerSamples) { + Weight = 0; + } else { + uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples(); + uint64_t CallsiteCount = 0; + LineLocation Callsite = Callee->getCallSiteLoc(); + if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) { + SampleRecord::CallTargetMap &TargetCounts = CallTargets.get(); + auto It = TargetCounts.find(CalleeSamples->getName()); + if (It != TargetCounts.end()) + CallsiteCount = It->second; + } + Weight = std::max(CallsiteCount, CalleeEntryCount); + } + addProfiledCall(ContextTracker.getFuncNameFor(Caller), - ContextTracker.getFuncNameFor(Callee)); + ContextTracker.getFuncNameFor(Callee), Weight); } } } - iterator begin() { return Root.Callees.begin(); } - iterator end() { return Root.Callees.end(); } + iterator begin() { return Root.Edges.begin(); } + iterator end() { return Root.Edges.end(); } ProfiledCallGraphNode *getEntryNode() { return &Root; } void addProfiledFunction(StringRef Name) { if (!ProfiledFunctions.count(Name)) { // Link to synthetic root to make sure every node is reachable // from root. This does not affect SCC order. ProfiledFunctions[Name] = ProfiledCallGraphNode(Name); - Root.Callees.insert(&ProfiledFunctions[Name]); + Root.Edges.emplace(&Root, &ProfiledFunctions[Name], 0); } } - void addProfiledCall(StringRef CallerName, StringRef CalleeName) { +private: + void addProfiledCall(StringRef CallerName, StringRef CalleeName, + uint64_t Weight = 0) { assert(ProfiledFunctions.count(CallerName)); auto CalleeIt = ProfiledFunctions.find(CalleeName); - if (CalleeIt == ProfiledFunctions.end()) { + if (CalleeIt == ProfiledFunctions.end()) return; + ProfiledCallGraphEdge Edge(&ProfiledFunctions[CallerName], + &CalleeIt->second, Weight); + auto &Edges = ProfiledFunctions[CallerName].Edges; + auto EdgeIt = Edges.find(Edge); + if (EdgeIt == Edges.end()) { + Edges.insert(Edge); + } else if (EdgeIt->Weight < Edge.Weight) { + // Replace existing call edges with same target but smaller weight. + Edges.erase(EdgeIt); + Edges.insert(Edge); } - ProfiledFunctions[CallerName].Callees.insert(&CalleeIt->second); } void addProfiledCalls(const FunctionSamples &Samples) { @@ -107,20 +163,20 @@ public: for (const auto &Sample : Samples.getBodySamples()) { for (const auto &Target : Sample.second.getCallTargets()) { addProfiledFunction(Target.first()); - addProfiledCall(Samples.getFuncName(), Target.first()); + addProfiledCall(Samples.getFuncName(), Target.first(), Target.second); } } for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) { for (const auto &InlinedSamples : CallsiteSamples.second) { addProfiledFunction(InlinedSamples.first); - addProfiledCall(Samples.getFuncName(), InlinedSamples.first); + addProfiledCall(Samples.getFuncName(), InlinedSamples.first, + InlinedSamples.second.getEntrySamples()); addProfiledCalls(InlinedSamples.second); } } } -private: ProfiledCallGraphNode Root; StringMap<ProfiledCallGraphNode> ProfiledFunctions; }; @@ -128,12 +184,14 @@ private: } // end namespace sampleprof template <> struct GraphTraits<ProfiledCallGraphNode *> { + using NodeType = ProfiledCallGraphNode; using NodeRef = ProfiledCallGraphNode *; - using ChildIteratorType = std::set<ProfiledCallGraphNode *>::iterator; + using EdgeType = NodeType::edge; + using ChildIteratorType = NodeType::const_iterator; static NodeRef getEntryNode(NodeRef PCGN) { return PCGN; } - static ChildIteratorType child_begin(NodeRef N) { return N->Callees.begin(); } - static ChildIteratorType child_end(NodeRef N) { return N->Callees.end(); } + static ChildIteratorType child_begin(NodeRef N) { return N->Edges.begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->Edges.end(); } }; template <> diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index c13407a44091..6002f0270083 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -141,7 +141,7 @@ private: AsanDtorKind DestructorKind; }; -// Insert AddressSanitizer (address sanity checking) instrumentation +// Insert AddressSanitizer (address basic correctness checking) instrumentation FunctionPass *createAddressSanitizerFunctionPass( bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false, diff --git a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h index d76b55babc74..45983ad9d571 100644 --- a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h +++ b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h @@ -22,6 +22,7 @@ class Function; struct AnnotationRemarksPass : public PassInfoMixin<AnnotationRemarksPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 72cb606eb51a..3c529abce85a 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -55,7 +55,6 @@ class MDNode; class MemorySSAUpdater; class PHINode; class StoreInst; -class SwitchInst; class TargetLibraryInfo; class TargetTransformInfo; @@ -238,10 +237,6 @@ CallInst *createCallMatchingInvoke(InvokeInst *II); /// This function converts the specified invoek into a normall call. void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr); -/// This function removes the default destination from the specified switch. -void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU = nullptr); - ///===---------------------------------------------------------------------===// /// Dbg Intrinsic utilities /// diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h index 22b2295cc9d7..c233e3dc168e 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -169,6 +169,10 @@ public: /// Called to update debug info associated with the instruction. virtual void updateDebugInfo(Instruction *I) const {} + + /// Return false if a sub-class wants to keep one of the loads/stores + /// after the SSA construction. + virtual bool shouldDelete(Instruction *I) const { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h new file mode 100644 index 000000000000..e1f681bbd367 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h @@ -0,0 +1,284 @@ +//===- Transforms/Utils/SampleProfileInference.h ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for the profile inference algorithm, profi. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H +#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallVector.h" + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" + +namespace llvm { + +class BasicBlock; +class Function; +class MachineBasicBlock; +class MachineFunction; + +namespace afdo_detail { + +template <class BlockT> struct TypeMap {}; +template <> struct TypeMap<BasicBlock> { + using BasicBlockT = BasicBlock; + using FunctionT = Function; +}; +template <> struct TypeMap<MachineBasicBlock> { + using BasicBlockT = MachineBasicBlock; + using FunctionT = MachineFunction; +}; + +} // end namespace afdo_detail + +struct FlowJump; + +/// A wrapper of a binary basic block. +struct FlowBlock { + uint64_t Index; + uint64_t Weight{0}; + bool UnknownWeight{false}; + uint64_t Flow{0}; + bool HasSelfEdge{false}; + std::vector<FlowJump *> SuccJumps; + std::vector<FlowJump *> PredJumps; + + /// Check if it is the entry block in the function. + bool isEntry() const { return PredJumps.empty(); } + + /// Check if it is an exit block in the function. + bool isExit() const { return SuccJumps.empty(); } +}; + +/// A wrapper of a jump between two basic blocks. +struct FlowJump { + uint64_t Source; + uint64_t Target; + uint64_t Flow{0}; + bool IsUnlikely{false}; +}; + +/// A wrapper of binary function with basic blocks and jumps. +struct FlowFunction { + std::vector<FlowBlock> Blocks; + std::vector<FlowJump> Jumps; + /// The index of the entry block. + uint64_t Entry; +}; + +void applyFlowInference(FlowFunction &Func); + +/// Sample profile inference pass. +template <typename BT> class SampleProfileInference { +public: + using BasicBlockT = typename afdo_detail::TypeMap<BT>::BasicBlockT; + using FunctionT = typename afdo_detail::TypeMap<BT>::FunctionT; + using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>; + using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>; + using EdgeWeightMap = DenseMap<Edge, uint64_t>; + using BlockEdgeMap = + DenseMap<const BasicBlockT *, SmallVector<const BasicBlockT *, 8>>; + + SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors, + BlockWeightMap &SampleBlockWeights) + : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {} + + /// Apply the profile inference algorithm for a given function + void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); + +private: + /// Try to infer branch probabilities mimicking implementation of + /// BranchProbabilityInfo. Unlikely taken branches are marked so that the + /// inference algorithm can avoid sending flow along corresponding edges. + void findUnlikelyJumps(const std::vector<const BasicBlockT *> &BasicBlocks, + BlockEdgeMap &Successors, FlowFunction &Func); + + /// Determine whether the block is an exit in the CFG. + bool isExit(const BasicBlockT *BB); + + /// Function. + const FunctionT &F; + + /// Successors for each basic block in the CFG. + BlockEdgeMap &Successors; + + /// Map basic blocks to their sampled weights. + BlockWeightMap &SampleBlockWeights; +}; + +template <typename BT> +void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights, + EdgeWeightMap &EdgeWeights) { + // Find all forwards reachable blocks which the inference algorithm will be + // applied on. + df_iterator_default_set<const BasicBlockT *> Reachable; + for (auto *BB : depth_first_ext(&F, Reachable)) + (void)BB /* Mark all reachable blocks */; + + // Find all backwards reachable blocks which the inference algorithm will be + // applied on. + df_iterator_default_set<const BasicBlockT *> InverseReachable; + for (const auto &BB : F) { + // An exit block is a block without any successors. + if (isExit(&BB)) { + for (auto *RBB : inverse_depth_first_ext(&BB, InverseReachable)) + (void)RBB; + } + } + + // Keep a stable order for reachable blocks + DenseMap<const BasicBlockT *, uint64_t> BlockIndex; + std::vector<const BasicBlockT *> BasicBlocks; + BlockIndex.reserve(Reachable.size()); + BasicBlocks.reserve(Reachable.size()); + for (const auto &BB : F) { + if (Reachable.count(&BB) && InverseReachable.count(&BB)) { + BlockIndex[&BB] = BasicBlocks.size(); + BasicBlocks.push_back(&BB); + } + } + + BlockWeights.clear(); + EdgeWeights.clear(); + bool HasSamples = false; + for (const auto *BB : BasicBlocks) { + auto It = SampleBlockWeights.find(BB); + if (It != SampleBlockWeights.end() && It->second > 0) { + HasSamples = true; + BlockWeights[BB] = It->second; + } + } + // Quit early for functions with a single block or ones w/o samples + if (BasicBlocks.size() <= 1 || !HasSamples) { + return; + } + + // Create necessary objects + FlowFunction Func; + Func.Blocks.reserve(BasicBlocks.size()); + // Create FlowBlocks + for (const auto *BB : BasicBlocks) { + FlowBlock Block; + if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) { + Block.UnknownWeight = false; + Block.Weight = SampleBlockWeights[BB]; + } else { + Block.UnknownWeight = true; + Block.Weight = 0; + } + Block.Index = Func.Blocks.size(); + Func.Blocks.push_back(Block); + } + // Create FlowEdges + for (const auto *BB : BasicBlocks) { + for (auto *Succ : Successors[BB]) { + if (!BlockIndex.count(Succ)) + continue; + FlowJump Jump; + Jump.Source = BlockIndex[BB]; + Jump.Target = BlockIndex[Succ]; + Func.Jumps.push_back(Jump); + if (BB == Succ) { + Func.Blocks[BlockIndex[BB]].HasSelfEdge = true; + } + } + } + for (auto &Jump : Func.Jumps) { + Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump); + Func.Blocks[Jump.Target].PredJumps.push_back(&Jump); + } + + // Try to infer probabilities of jumps based on the content of basic block + findUnlikelyJumps(BasicBlocks, Successors, Func); + + // Find the entry block + for (size_t I = 0; I < Func.Blocks.size(); I++) { + if (Func.Blocks[I].isEntry()) { + Func.Entry = I; + break; + } + } + + // Create and apply the inference network model. + applyFlowInference(Func); + + // Extract the resulting weights from the control flow + // All weights are increased by one to avoid propagation errors introduced by + // zero weights. + for (const auto *BB : BasicBlocks) { + BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow; + } + for (auto &Jump : Func.Jumps) { + Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]); + EdgeWeights[E] = Jump.Flow; + } + +#ifndef NDEBUG + // Unreachable blocks and edges should not have a weight. + for (auto &I : BlockWeights) { + assert(Reachable.contains(I.first)); + assert(InverseReachable.contains(I.first)); + } + for (auto &I : EdgeWeights) { + assert(Reachable.contains(I.first.first) && + Reachable.contains(I.first.second)); + assert(InverseReachable.contains(I.first.first) && + InverseReachable.contains(I.first.second)); + } +#endif +} + +template <typename BT> +inline void SampleProfileInference<BT>::findUnlikelyJumps( + const std::vector<const BasicBlockT *> &BasicBlocks, + BlockEdgeMap &Successors, FlowFunction &Func) {} + +template <> +inline void SampleProfileInference<BasicBlock>::findUnlikelyJumps( + const std::vector<const BasicBlockT *> &BasicBlocks, + BlockEdgeMap &Successors, FlowFunction &Func) { + for (auto &Jump : Func.Jumps) { + const auto *BB = BasicBlocks[Jump.Source]; + const auto *Succ = BasicBlocks[Jump.Target]; + const Instruction *TI = BB->getTerminator(); + // Check if a block ends with InvokeInst and mark non-taken branch unlikely. + // In that case block Succ should be a landing pad + if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) { + if (isa<InvokeInst>(TI)) { + Jump.IsUnlikely = true; + } + } + const Instruction *SuccTI = Succ->getTerminator(); + // Check if the target block contains UnreachableInst and mark it unlikely + if (SuccTI->getNumSuccessors() == 0) { + if (isa<UnreachableInst>(SuccTI)) { + Jump.IsUnlikely = true; + } + } + } +} + +template <typename BT> +inline bool SampleProfileInference<BT>::isExit(const BasicBlockT *BB) { + return BB->succ_empty(); +} + +template <> +inline bool SampleProfileInference<BasicBlock>::isExit(const BasicBlock *BB) { + return succ_empty(BB); +} + +} // end namespace llvm +#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 6a2f0acf46f3..175bdde7fd05 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -38,6 +38,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SampleProfileInference.h" #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" namespace llvm { @@ -74,6 +75,8 @@ template <> struct IRTraits<BasicBlock> { } // end namespace afdo_detail +extern cl::opt<bool> SampleProfileUseProfi; + template <typename BT> class SampleProfileLoaderBaseImpl { public: SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName) @@ -142,6 +145,9 @@ protected: ArrayRef<BasicBlockT *> Descendants, PostDominatorTreeT *DomTree); void propagateWeights(FunctionT &F); + void applyProfi(FunctionT &F, BlockEdgeMap &Successors, + BlockWeightMap &SampleBlockWeights, + BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(FunctionT &F); bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount); @@ -150,6 +156,11 @@ protected: bool computeAndPropagateWeights(FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs); + void initWeightPropagation(FunctionT &F, + const DenseSet<GlobalValue::GUID> &InlinedGUIDs); + void + finalizeWeightPropagation(FunctionT &F, + const DenseSet<GlobalValue::GUID> &InlinedGUIDs); void emitCoverageRemarks(FunctionT &F); /// Map basic blocks to their computed weights. @@ -741,50 +752,65 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) { /// known). template <typename BT> void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) { - bool Changed = true; - unsigned I = 0; - - // If BB weight is larger than its corresponding loop's header BB weight, - // use the BB weight to replace the loop header BB weight. - for (auto &BI : F) { - BasicBlockT *BB = &BI; - LoopT *L = LI->getLoopFor(BB); - if (!L) { - continue; + // Flow-based profile inference is only usable with BasicBlock instantiation + // of SampleProfileLoaderBaseImpl. + if (SampleProfileUseProfi) { + // Prepare block sample counts for inference. + BlockWeightMap SampleBlockWeights; + for (const auto &BI : F) { + ErrorOr<uint64_t> Weight = getBlockWeight(&BI); + if (Weight) + SampleBlockWeights[&BI] = Weight.get(); } - BasicBlockT *Header = L->getHeader(); - if (Header && BlockWeights[BB] > BlockWeights[Header]) { - BlockWeights[Header] = BlockWeights[BB]; + // Fill in BlockWeights and EdgeWeights using an inference algorithm. + applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights); + } else { + bool Changed = true; + unsigned I = 0; + + // If BB weight is larger than its corresponding loop's header BB weight, + // use the BB weight to replace the loop header BB weight. + for (auto &BI : F) { + BasicBlockT *BB = &BI; + LoopT *L = LI->getLoopFor(BB); + if (!L) { + continue; + } + BasicBlockT *Header = L->getHeader(); + if (Header && BlockWeights[BB] > BlockWeights[Header]) { + BlockWeights[Header] = BlockWeights[BB]; + } } - } - // Before propagation starts, build, for each block, a list of - // unique predecessors and successors. This is necessary to handle - // identical edges in multiway branches. Since we visit all blocks and all - // edges of the CFG, it is cleaner to build these lists once at the start - // of the pass. - buildEdges(F); + // Propagate until we converge or we go past the iteration limit. + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } - // Propagate until we converge or we go past the iteration limit. - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, false); - } + // The first propagation propagates BB counts from annotated BBs to unknown + // BBs. The 2nd propagation pass resets edges weights, and use all BB + // weights to propagate edge weights. + VisitedEdges.clear(); + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, false); + } - // The first propagation propagates BB counts from annotated BBs to unknown - // BBs. The 2nd propagation pass resets edges weights, and use all BB weights - // to propagate edge weights. - VisitedEdges.clear(); - Changed = true; - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, false); + // The 3rd propagation pass allows adjust annotated BB weights that are + // obviously wrong. + Changed = true; + while (Changed && I++ < SampleProfileMaxPropagateIterations) { + Changed = propagateThroughEdges(F, true); + } } +} - // The 3rd propagation pass allows adjust annotated BB weights that are - // obviously wrong. - Changed = true; - while (Changed && I++ < SampleProfileMaxPropagateIterations) { - Changed = propagateThroughEdges(F, true); - } +template <typename BT> +void SampleProfileLoaderBaseImpl<BT>::applyProfi( + FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights, + BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) { + auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights); + Infer.apply(BlockWeights, EdgeWeights); } /// Generate branch weight metadata for all branches in \p F. @@ -842,26 +868,64 @@ bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights( Changed |= computeBlockWeights(F); if (Changed) { - // Add an entry count to the function using the samples gathered at the - // function entry. - // Sets the GUIDs that are inlined in the profiled binary. This is used - // for ThinLink to make correct liveness analysis, and also make the IR - // match the profiled binary before annotation. - getFunction(F).setEntryCount( - ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), - &InlinedGUIDs); + // Initialize propagation. + initWeightPropagation(F, InlinedGUIDs); + // Propagate weights to all edges. + propagateWeights(F); + + // Post-process propagated weights. + finalizeWeightPropagation(F, InlinedGUIDs); + } + + return Changed; +} + +template <typename BT> +void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation( + FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { + // Add an entry count to the function using the samples gathered at the + // function entry. + // Sets the GUIDs that are inlined in the profiled binary. This is used + // for ThinLink to make correct liveness analysis, and also make the IR + // match the profiled binary before annotation. + getFunction(F).setEntryCount( + ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), + &InlinedGUIDs); + + if (!SampleProfileUseProfi) { // Compute dominance and loop info needed for propagation. computeDominanceAndLoopInfo(F); // Find equivalence classes. findEquivalenceClasses(F); - - // Propagate weights to all edges. - propagateWeights(F); } - return Changed; + // Before propagation starts, build, for each block, a list of + // unique predecessors and successors. This is necessary to handle + // identical edges in multiway branches. Since we visit all blocks and all + // edges of the CFG, it is cleaner to build these lists once at the start + // of the pass. + buildEdges(F); +} + +template <typename BT> +void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation( + FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) { + // If we utilize a flow-based count inference, then we trust the computed + // counts and set the entry count as computed by the algorithm. This is + // primarily done to sync the counts produced by profi and BFI inference, + // which uses the entry count for mass propagation. + // If profi produces a zero-value for the entry count, we fallback to + // Samples->getHeadSamples() + 1 to avoid functions with zero count. + if (SampleProfileUseProfi) { + const BasicBlockT *EntryBB = getEntryBB(&F); + if (BlockWeights[EntryBB] > 0) { + getFunction(F).setEntryCount( + ProfileCount(BlockWeights[EntryBB], Function::PCT_Real), + &InlinedGUIDs); + } + } } template <typename BT> |