diff options
Diffstat (limited to 'include/llvm/Analysis')
57 files changed, 3419 insertions, 1795 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index e00ae4f3beec..9de075dfd681 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -38,24 +38,30 @@ #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H #define LLVM_ANALYSIS_ALIASANALYSIS_H +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Metadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include <cstdint> +#include <functional> +#include <memory> +#include <vector> namespace llvm { -class BasicAAResult; -class LoadInst; -class StoreInst; -class VAArgInst; -class DataLayout; -class Pass; + class AnalysisUsage; -class MemTransferInst; -class MemIntrinsic; +class BasicAAResult; +class BasicBlock; class DominatorTree; class OrderedBasicBlock; +class Value; /// The possible results of an alias query. /// @@ -89,19 +95,62 @@ enum AliasResult { /// /// This is no access at all, a modification, a reference, or both /// a modification and a reference. These are specifically structured such that -/// they form a two bit matrix and bit-tests for 'mod' or 'ref' work with any -/// of the possible values. -enum ModRefInfo { +/// they form a two bit matrix and bit-tests for 'mod' or 'ref' +/// work with any of the possible values. + +enum class ModRefInfo { /// The access neither references nor modifies the value stored in memory. - MRI_NoModRef = 0, - /// The access references the value stored in memory. - MRI_Ref = 1, - /// The access modifies the value stored in memory. - MRI_Mod = 2, - /// The access both references and modifies the value stored in memory. - MRI_ModRef = MRI_Ref | MRI_Mod + NoModRef = 0, + /// The access may reference the value stored in memory. + Ref = 1, + /// The access may modify the value stored in memory. + Mod = 2, + /// The access may reference and may modify the value stored in memory. + ModRef = Ref | Mod, }; +LLVM_NODISCARD inline bool isNoModRef(const ModRefInfo MRI) { + return MRI == ModRefInfo::NoModRef; +} +LLVM_NODISCARD inline bool isModOrRefSet(const ModRefInfo MRI) { + return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::ModRef); +} +LLVM_NODISCARD inline bool isModAndRefSet(const ModRefInfo MRI) { + return (static_cast<int>(MRI) & static_cast<int>(ModRefInfo::ModRef)) == + static_cast<int>(ModRefInfo::ModRef); +} +LLVM_NODISCARD inline bool isModSet(const ModRefInfo MRI) { + return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod); +} +LLVM_NODISCARD inline bool isRefSet(const ModRefInfo MRI) { + return static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref); +} + +LLVM_NODISCARD inline ModRefInfo setMod(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) | static_cast<int>(ModRefInfo::Mod)); +} +LLVM_NODISCARD inline ModRefInfo setRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) | static_cast<int>(ModRefInfo::Ref)); +} +LLVM_NODISCARD inline ModRefInfo setModAndRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) | + static_cast<int>(ModRefInfo::ModRef)); +} +LLVM_NODISCARD inline ModRefInfo clearMod(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Ref)); +} +LLVM_NODISCARD inline ModRefInfo clearRef(const ModRefInfo MRI) { + return ModRefInfo(static_cast<int>(MRI) & static_cast<int>(ModRefInfo::Mod)); +} +LLVM_NODISCARD inline ModRefInfo unionModRef(const ModRefInfo MRI1, + const ModRefInfo MRI2) { + return ModRefInfo(static_cast<int>(MRI1) | static_cast<int>(MRI2)); +} +LLVM_NODISCARD inline ModRefInfo intersectModRef(const ModRefInfo MRI1, + const ModRefInfo MRI2) { + return ModRefInfo(static_cast<int>(MRI1) & static_cast<int>(MRI2)); +} + /// The locations at which a function might access memory. /// /// These are primarily used in conjunction with the \c AccessKind bits to @@ -129,27 +178,31 @@ enum FunctionModRefBehavior { /// This property corresponds to the GCC 'const' attribute. /// This property corresponds to the LLVM IR 'readnone' attribute. /// This property corresponds to the IntrNoMem LLVM intrinsic flag. - FMRB_DoesNotAccessMemory = FMRL_Nowhere | MRI_NoModRef, + FMRB_DoesNotAccessMemory = + FMRL_Nowhere | static_cast<int>(ModRefInfo::NoModRef), /// The only memory references in this function (if it has any) are /// non-volatile loads from objects pointed to by its pointer-typed /// arguments, with arbitrary offsets. /// /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag. - FMRB_OnlyReadsArgumentPointees = FMRL_ArgumentPointees | MRI_Ref, + FMRB_OnlyReadsArgumentPointees = + FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::Ref), /// The only memory references in this function (if it has any) are /// non-volatile loads and stores from objects pointed to by its /// pointer-typed arguments, with arbitrary offsets. /// /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag. - FMRB_OnlyAccessesArgumentPointees = FMRL_ArgumentPointees | MRI_ModRef, + FMRB_OnlyAccessesArgumentPointees = + FMRL_ArgumentPointees | static_cast<int>(ModRefInfo::ModRef), /// The only memory references in this function (if it has any) are /// references of memory that is otherwise inaccessible via LLVM IR. /// /// This property corresponds to the LLVM IR inaccessiblememonly attribute. - FMRB_OnlyAccessesInaccessibleMem = FMRL_InaccessibleMem | MRI_ModRef, + FMRB_OnlyAccessesInaccessibleMem = + FMRL_InaccessibleMem | static_cast<int>(ModRefInfo::ModRef), /// The function may perform non-volatile loads and stores of objects /// pointed to by its pointer-typed arguments, with arbitrary offsets, and @@ -159,7 +212,8 @@ enum FunctionModRefBehavior { /// This property corresponds to the LLVM IR /// inaccessiblemem_or_argmemonly attribute. FMRB_OnlyAccessesInaccessibleOrArgMem = FMRL_InaccessibleMem | - FMRL_ArgumentPointees | MRI_ModRef, + FMRL_ArgumentPointees | + static_cast<int>(ModRefInfo::ModRef), /// This function does not perform any non-local stores or volatile loads, /// but may read from any memory location. @@ -167,20 +221,30 @@ enum FunctionModRefBehavior { /// This property corresponds to the GCC 'pure' attribute. /// This property corresponds to the LLVM IR 'readonly' attribute. /// This property corresponds to the IntrReadMem LLVM intrinsic flag. - FMRB_OnlyReadsMemory = FMRL_Anywhere | MRI_Ref, + FMRB_OnlyReadsMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Ref), // This function does not read from memory anywhere, but may write to any // memory location. // // This property corresponds to the LLVM IR 'writeonly' attribute. // This property corresponds to the IntrWriteMem LLVM intrinsic flag. - FMRB_DoesNotReadMemory = FMRL_Anywhere | MRI_Mod, + FMRB_DoesNotReadMemory = FMRL_Anywhere | static_cast<int>(ModRefInfo::Mod), /// This indicates that the function could not be classified into one of the /// behaviors above. - FMRB_UnknownModRefBehavior = FMRL_Anywhere | MRI_ModRef + FMRB_UnknownModRefBehavior = + FMRL_Anywhere | static_cast<int>(ModRefInfo::ModRef) }; +// Wrapper method strips bits significant only in FunctionModRefBehavior, +// to obtain a valid ModRefInfo. The benefit of using the wrapper is that if +// ModRefInfo enum changes, the wrapper can be updated to & with the new enum +// entry with all bits set to 1. +LLVM_NODISCARD inline ModRefInfo +createModRefInfo(const FunctionModRefBehavior FMRB) { + return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef)); +} + class AAResults { public: // Make these results default constructable and movable. We have to spell @@ -348,13 +412,13 @@ public: /// Checks if functions with the specified behavior are known to only read /// from non-volatile memory (or not access memory at all). static bool onlyReadsMemory(FunctionModRefBehavior MRB) { - return !(MRB & MRI_Mod); + return !isModSet(createModRefInfo(MRB)); } /// Checks if functions with the specified behavior are known to only write /// memory (or not access memory at all). static bool doesNotReadMemory(FunctionModRefBehavior MRB) { - return !(MRB & MRI_Ref); + return !isRefSet(createModRefInfo(MRB)); } /// Checks if functions with the specified behavior are known to read and @@ -368,7 +432,8 @@ public: /// read or write from objects pointed to be their pointer-typed arguments /// (with arbitrary offsets). static bool doesAccessArgPointees(FunctionModRefBehavior MRB) { - return (MRB & MRI_ModRef) && (MRB & FMRL_ArgumentPointees); + return isModOrRefSet(createModRefInfo(MRB)) && + (MRB & FMRL_ArgumentPointees); } /// Checks if functions with the specified behavior are known to read and @@ -380,7 +445,7 @@ public: /// Checks if functions with the specified behavior are known to potentially /// read or write from memory that is inaccessible from LLVM IR. static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) { - return (MRB & MRI_ModRef) && (MRB & FMRL_InaccessibleMem); + return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem); } /// Checks if functions with the specified behavior are known to read and @@ -500,43 +565,26 @@ public: return getModRefInfo(I, MemoryLocation(P, Size)); } - /// Check whether or not an instruction may read or write memory (without - /// regard to a specific location). + /// Check whether or not an instruction may read or write the optionally + /// specified memory location. /// - /// For function calls, this delegates to the alias-analysis specific - /// call-site mod-ref behavior queries. Otherwise it delegates to the generic - /// mod ref information query without a location. - ModRefInfo getModRefInfo(const Instruction *I) { - if (auto CS = ImmutableCallSite(I)) { - auto MRB = getModRefBehavior(CS); - if ((MRB & MRI_ModRef) == MRI_ModRef) - return MRI_ModRef; - if (MRB & MRI_Ref) - return MRI_Ref; - if (MRB & MRI_Mod) - return MRI_Mod; - return MRI_NoModRef; - } - - return getModRefInfo(I, MemoryLocation()); - } - - /// Check whether or not an instruction may read or write the specified - /// memory location. - /// - /// Note explicitly that getModRefInfo considers the effects of reading and - /// writing the memory location, and not the effect of ordering relative to - /// other instructions. Thus, a volatile load is considered to be Ref, - /// because it does not actually write memory, it just can't be reordered - /// relative to other volatiles (or removed). Atomic ordered loads/stores are - /// considered ModRef ATM because conservatively, the visible effect appears - /// as if memory was written, not just an ordering constraint. /// /// An instruction that doesn't read or write memory may be trivially LICM'd /// for example. /// - /// This primarily delegates to specific helpers above. - ModRefInfo getModRefInfo(const Instruction *I, const MemoryLocation &Loc) { + /// For function calls, this delegates to the alias-analysis specific + /// call-site mod-ref behavior queries. Otherwise it delegates to the specific + /// helpers above. + ModRefInfo getModRefInfo(const Instruction *I, + const Optional<MemoryLocation> &OptLoc) { + if (OptLoc == None) { + if (auto CS = ImmutableCallSite(I)) { + return createModRefInfo(getModRefBehavior(CS)); + } + } + + const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); + switch (I->getOpcode()) { case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, Loc); case Instruction::Load: return getModRefInfo((const LoadInst*)I, Loc); @@ -553,7 +601,7 @@ public: case Instruction::CatchRet: return getModRefInfo((const CatchReturnInst *)I, Loc); default: - return MRI_NoModRef; + return ModRefInfo::NoModRef; } } @@ -574,7 +622,7 @@ public: /// \brief Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I - /// in a BasicBlock. A ordered basic block \p OBB can be used to speed up + /// in a BasicBlock. An ordered basic block \p OBB can be used to speed up /// instruction ordering queries inside the BasicBlock containing \p I. ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, @@ -620,6 +668,7 @@ public: private: class Concept; + template <typename T> class Model; template <typename T> friend class AAResultBase; @@ -633,7 +682,7 @@ private: /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis /// pointer or reference. -typedef AAResults AliasAnalysis; +using AliasAnalysis = AAResults; /// A private abstract base class describing the concept of an individual alias /// analysis implementation. @@ -714,7 +763,7 @@ public: explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) { Result.setAAResults(&AAR); } - ~Model() override {} + ~Model() override = default; void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); } @@ -824,7 +873,7 @@ protected: } }; - explicit AAResultBase() {} + explicit AAResultBase() = default; // Provide all the copy and move constructors so that derived types aren't // constrained. @@ -853,7 +902,7 @@ public: } ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { - return MRI_ModRef; + return ModRefInfo::ModRef; } FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) { @@ -865,15 +914,14 @@ public: } ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { - return MRI_ModRef; + return ModRefInfo::ModRef; } ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { - return MRI_ModRef; + return ModRefInfo::ModRef; } }; - /// Return true if this pointer is returned by a noalias function. bool isNoAliasCall(const Value *V); @@ -910,7 +958,7 @@ bool isIdentifiedFunctionLocal(const Value *V); /// ensure the analysis itself is registered with its AnalysisManager. class AAManager : public AnalysisInfoMixin<AAManager> { public: - typedef AAResults Result; + using Result = AAResults; /// Register a specific AA result. template <typename AnalysisT> void registerFunctionAnalysis() { @@ -931,6 +979,7 @@ public: private: friend AnalysisInfoMixin<AAManager>; + static AnalysisKey Key; SmallVector<void (*)(Function &F, FunctionAnalysisManager &AM, @@ -1001,6 +1050,6 @@ AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); /// sure the analyses required by \p createLegacyPMAAResults are available. void getAAResultsAnalysisUsage(AnalysisUsage &AU); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_ALIASANALYSIS_H diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index daafd2fabe78..7da3ebabb8a3 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -18,36 +18,46 @@ #define LLVM_ANALYSIS_ALIASSETTRACKER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> #include <vector> namespace llvm { +class AliasSetTracker; +class BasicBlock; class LoadInst; +class MemSetInst; +class MemTransferInst; +class raw_ostream; class StoreInst; class VAArgInst; -class MemSetInst; -class AliasSetTracker; -class AliasSet; +class Value; class AliasSet : public ilist_node<AliasSet> { friend class AliasSetTracker; class PointerRec { Value *Val; // The pointer this record corresponds to. - PointerRec **PrevInList, *NextInList; - AliasSet *AS; - uint64_t Size; + PointerRec **PrevInList = nullptr; + PointerRec *NextInList = nullptr; + AliasSet *AS = nullptr; + uint64_t Size = 0; AAMDNodes AAInfo; public: PointerRec(Value *V) - : Val(V), PrevInList(nullptr), NextInList(nullptr), AS(nullptr), Size(0), - AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {} + : Val(V), AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {} Value *getValue() const { return Val; } @@ -121,9 +131,10 @@ class AliasSet : public ilist_node<AliasSet> { }; // Doubly linked list of nodes. - PointerRec *PtrList, **PtrListEnd; + PointerRec *PtrList = nullptr; + PointerRec **PtrListEnd; // Forwarding pointer. - AliasSet *Forward; + AliasSet *Forward = nullptr; /// All instructions without a specific address in this alias set. /// In rare cases this vector can have a null'ed out WeakVH @@ -167,7 +178,7 @@ class AliasSet : public ilist_node<AliasSet> { /// True if this alias set contains volatile loads or stores. unsigned Volatile : 1; - unsigned SetSize; + unsigned SetSize = 0; void addRef() { ++RefCount; } @@ -183,6 +194,9 @@ class AliasSet : public ilist_node<AliasSet> { } public: + AliasSet(const AliasSet &) = delete; + AliasSet &operator=(const AliasSet &) = delete; + /// Accessors... bool isRef() const { return Access & RefAccess; } bool isMod() const { return Access & ModAccess; } @@ -249,12 +263,8 @@ public: private: // Can only be created by AliasSetTracker. AliasSet() - : PtrList(nullptr), PtrListEnd(&PtrList), Forward(nullptr), RefCount(0), - AliasAny(false), Access(NoAccess), Alias(SetMustAlias), - Volatile(false), SetSize(0) {} - - AliasSet(const AliasSet &AS) = delete; - void operator=(const AliasSet &AS) = delete; + : PtrListEnd(&PtrList), RefCount(0), AliasAny(false), Access(NoAccess), + Alias(SetMustAlias), Volatile(false) {} PointerRec *getSomePointer() const { return PtrList; @@ -281,6 +291,7 @@ private: const AAMDNodes &AAInfo, bool KnownMustAlias = false); void addUnknownInst(Instruction *I, AliasAnalysis &AA); + void removeUnknownInst(AliasSetTracker &AST, Instruction *I) { bool WasEmpty = UnknownInsts.empty(); for (size_t i = 0, e = UnknownInsts.size(); i != e; ++i) @@ -292,6 +303,7 @@ private: if (!WasEmpty && UnknownInsts.empty()) dropRef(AST); } + void setVolatile() { Volatile = true; } public: @@ -312,11 +324,13 @@ class AliasSetTracker { /// Value is deleted. class ASTCallbackVH final : public CallbackVH { AliasSetTracker *AST; + void deleted() override; void allUsesReplacedWith(Value *) override; public: ASTCallbackVH(Value *V, AliasSetTracker *AST = nullptr); + ASTCallbackVH &operator=(Value *V); }; /// Traits to tell DenseMap that tell us how to compare and hash the value @@ -326,9 +340,8 @@ class AliasSetTracker { AliasAnalysis &AA; ilist<AliasSet> AliasSets; - typedef DenseMap<ASTCallbackVH, AliasSet::PointerRec*, - ASTCallbackVHDenseMapInfo> - PointerMapType; + using PointerMapType = DenseMap<ASTCallbackVH, AliasSet::PointerRec *, + ASTCallbackVHDenseMapInfo>; // Map from pointers to their node PointerMapType PointerMap; @@ -336,8 +349,7 @@ class AliasSetTracker { public: /// Create an empty collection of AliasSets, and use the specified alias /// analysis object to disambiguate load and store addresses. - explicit AliasSetTracker(AliasAnalysis &aa) - : AA(aa), TotalMayAliasSetSize(0), AliasAnyAS(nullptr) {} + explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {} ~AliasSetTracker() { clear(); } /// These methods are used to add different types of instructions to the alias @@ -401,8 +413,8 @@ public: /// tracker already knows about a value, it will ignore the request. void copyValue(Value *From, Value *To); - typedef ilist<AliasSet>::iterator iterator; - typedef ilist<AliasSet>::const_iterator const_iterator; + using iterator = ilist<AliasSet>::iterator; + using const_iterator = ilist<AliasSet>::const_iterator; const_iterator begin() const { return AliasSets.begin(); } const_iterator end() const { return AliasSets.end(); } @@ -417,11 +429,11 @@ private: friend class AliasSet; // The total number of pointers contained in all "may" alias sets. - unsigned TotalMayAliasSetSize; + unsigned TotalMayAliasSetSize = 0; // A non-null value signifies this AST is saturated. A saturated AST lumps // all pointers into a single "May" set. - AliasSet *AliasAnyAS; + AliasSet *AliasAnyAS = nullptr; void removeAliasSet(AliasSet *AS); @@ -451,6 +463,6 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) { return OS; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_ALIASSETTRACKER_H diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index 58d72afdc1b6..c965e62a0216 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -1,4 +1,4 @@ -//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume ---*- C++ -*-===// +//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -28,6 +27,11 @@ namespace llvm { +class CallInst; +class Function; +class raw_ostream; +class Value; + /// \brief A cache of @llvm.assume calls within a function. /// /// This cache provides fast lookup of assumptions within a function by caching @@ -47,6 +51,7 @@ class AssumptionCache { class AffectedValueCallbackVH final : public CallbackVH { AssumptionCache *AC; + void deleted() override; void allUsesReplacedWith(Value *) override; @@ -76,7 +81,7 @@ class AssumptionCache { /// /// We want to be as lazy about this as possible, and so we scan the function /// at the last moment. - bool Scanned; + bool Scanned = false; /// \brief Scan the function for assumptions and add them to the cache. void scanFunction(); @@ -84,7 +89,7 @@ class AssumptionCache { public: /// \brief Construct an AssumptionCache from a function by scanning all of /// its instructions. - AssumptionCache(Function &F) : F(F), Scanned(false) {} + AssumptionCache(Function &F) : F(F) {} /// This cache is designed to be self-updating and so it should never be /// invalidated. @@ -145,10 +150,11 @@ public: /// assumption caches for a given function. class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> { friend AnalysisInfoMixin<AssumptionAnalysis>; + static AnalysisKey Key; public: - typedef AssumptionCache Result; + using Result = AssumptionCache; AssumptionCache run(Function &F, FunctionAnalysisManager &) { return AssumptionCache(F); @@ -161,6 +167,7 @@ class AssumptionPrinterPass : public PassInfoMixin<AssumptionPrinterPass> { public: explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; @@ -177,10 +184,11 @@ class AssumptionCacheTracker : public ImmutablePass { /// delete our cache of intrinsics for a function when it is deleted. class FunctionCallbackVH final : public CallbackVH { AssumptionCacheTracker *ACT; + void deleted() override; public: - typedef DenseMapInfo<Value *> DMI; + using DMI = DenseMapInfo<Value *>; FunctionCallbackVH(Value *V, AssumptionCacheTracker *ACT = nullptr) : CallbackVH(V), ACT(ACT) {} @@ -188,8 +196,10 @@ class AssumptionCacheTracker : public ImmutablePass { friend FunctionCallbackVH; - typedef DenseMap<FunctionCallbackVH, std::unique_ptr<AssumptionCache>, - FunctionCallbackVH::DMI> FunctionCallsMap; + using FunctionCallsMap = + DenseMap<FunctionCallbackVH, std::unique_ptr<AssumptionCache>, + FunctionCallbackVH::DMI>; + FunctionCallsMap AssumptionCaches; public: @@ -208,6 +218,7 @@ public: } void verifyAnalysis() const override; + bool doFinalization(Module &) override { verifyAnalysis(); return false; @@ -218,4 +229,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_ASSUMPTIONCACHE_H diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h index 14e4bded264a..42e5e9714071 100644 --- a/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/include/llvm/Analysis/BasicAliasAnalysis.h @@ -14,22 +14,36 @@ #ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H #define LLVM_ANALYSIS_BASICALIASANALYSIS_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Pass.h" +#include <algorithm> +#include <cstdint> +#include <memory> +#include <utility> namespace llvm { + +struct AAMDNodes; +class APInt; class AssumptionCache; +class BasicBlock; +class DataLayout; class DominatorTree; +class Function; +class GEPOperator; class LoopInfo; +class PHINode; +class SelectInst; +class TargetLibraryInfo; +class Value; /// This is the AA result object for the basic, local, and stateless alias /// analysis. It implements the AA query interface in an entirely stateless @@ -86,7 +100,6 @@ private: // A linear transformation of a Value; this class represents ZExt(SExt(V, // SExtBits), ZExtBits) * Scale + Offset. struct VariableGEPIndex { - // An opaque Value - we can't decompose this further. const Value *V; @@ -124,8 +137,8 @@ private: }; /// Track alias queries to guard against recursion. - typedef std::pair<MemoryLocation, MemoryLocation> LocPair; - typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; + using LocPair = std::pair<MemoryLocation, MemoryLocation>; + using AliasCacheTy = SmallDenseMap<LocPair, AliasResult, 8>; AliasCacheTy AliasCache; /// Tracks phi nodes we have visited. @@ -201,10 +214,11 @@ private: /// Analysis pass providing a never-invalidated alias analysis result. class BasicAA : public AnalysisInfoMixin<BasicAA> { friend AnalysisInfoMixin<BasicAA>; + static AnalysisKey Key; public: - typedef BasicAAResult Result; + using Result = BasicAAResult; BasicAAResult run(Function &F, FunctionAnalysisManager &AM); }; @@ -251,6 +265,6 @@ public: } }; -} +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_BASICALIASANALYSIS_H diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h index cbae01c9102f..89370cbeeea1 100644 --- a/include/llvm/Analysis/BlockFrequencyInfo.h +++ b/include/llvm/Analysis/BlockFrequencyInfo.h @@ -18,31 +18,36 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" -#include <climits> +#include <cstdint> +#include <memory> namespace llvm { +class BasicBlock; class BranchProbabilityInfo; +class Function; class LoopInfo; +class Module; +class raw_ostream; template <class BlockT> class BlockFrequencyInfoImpl; +enum PGOViewCountsType { PGOVCT_None, PGOVCT_Graph, PGOVCT_Text }; + /// BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to /// estimate IR basic block frequencies. class BlockFrequencyInfo { - typedef BlockFrequencyInfoImpl<BasicBlock> ImplType; - std::unique_ptr<ImplType> BFI; + using ImplType = BlockFrequencyInfoImpl<BasicBlock>; - void operator=(const BlockFrequencyInfo &) = delete; - BlockFrequencyInfo(const BlockFrequencyInfo &) = delete; + std::unique_ptr<ImplType> BFI; public: BlockFrequencyInfo(); BlockFrequencyInfo(const Function &F, const BranchProbabilityInfo &BPI, const LoopInfo &LI); + BlockFrequencyInfo(const BlockFrequencyInfo &) = delete; + BlockFrequencyInfo &operator=(const BlockFrequencyInfo &) = delete; BlockFrequencyInfo(BlockFrequencyInfo &&Arg); - BlockFrequencyInfo &operator=(BlockFrequencyInfo &&RHS); - ~BlockFrequencyInfo(); /// Handle invalidation explicitly. @@ -70,6 +75,10 @@ public: /// the enclosing function's count (if available) and returns the value. Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const; + /// \brief Returns true if \p BB is an irreducible loop header + /// block. Otherwise false. + bool isIrrLoopHeader(const BasicBlock *BB); + // Set the frequency of the given basic block. void setBlockFreq(const BasicBlock *BB, uint64_t Freq); @@ -100,11 +109,12 @@ public: class BlockFrequencyAnalysis : public AnalysisInfoMixin<BlockFrequencyAnalysis> { friend AnalysisInfoMixin<BlockFrequencyAnalysis>; + static AnalysisKey Key; public: - /// \brief Provide the result typedef for this analysis pass. - typedef BlockFrequencyInfo Result; + /// \brief Provide the result type for this analysis pass. + using Result = BlockFrequencyInfo; /// \brief Run the analysis pass over a function and produce BFI. Result run(Function &F, FunctionAnalysisManager &AM); @@ -117,6 +127,7 @@ class BlockFrequencyPrinterPass public: explicit BlockFrequencyPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; @@ -140,6 +151,6 @@ public: void print(raw_ostream &OS, const Module *M) const override; }; -} +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFO_H diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 5de3821242e0..40c40b80bc89 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1,4 +1,4 @@ -//==- BlockFrequencyInfoImpl.h - Block Frequency Implementation -*- C++ -*-===// +//==- BlockFrequencyInfoImpl.h - Block Frequency Implementation --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -16,28 +16,39 @@ #define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/ScaledNumber.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> #include <deque> +#include <iterator> +#include <limits> #include <list> #include <string> +#include <utility> #include <vector> #define DEBUG_TYPE "block-freq" namespace llvm { -class BasicBlock; class BranchProbabilityInfo; class Function; class Loop; @@ -58,7 +69,8 @@ template <class BT> struct BlockEdgesAdder; /// \brief Mass of a block. /// /// This class implements a sort of fixed-point fraction always between 0.0 and -/// 1.0. getMass() == UINT64_MAX indicates a value of 1.0. +/// 1.0. getMass() == std::numeric_limits<uint64_t>::max() indicates a value of +/// 1.0. /// /// Masses can be added and subtracted. Simple saturation arithmetic is used, /// so arithmetic operations never overflow or underflow. @@ -69,18 +81,21 @@ template <class BT> struct BlockEdgesAdder; /// /// Masses can be scaled by \a BranchProbability at maximum precision. class BlockMass { - uint64_t Mass; + uint64_t Mass = 0; public: - BlockMass() : Mass(0) {} + BlockMass() = default; explicit BlockMass(uint64_t Mass) : Mass(Mass) {} static BlockMass getEmpty() { return BlockMass(); } - static BlockMass getFull() { return BlockMass(UINT64_MAX); } + + static BlockMass getFull() { + return BlockMass(std::numeric_limits<uint64_t>::max()); + } uint64_t getMass() const { return Mass; } - bool isFull() const { return Mass == UINT64_MAX; } + bool isFull() const { return Mass == std::numeric_limits<uint64_t>::max(); } bool isEmpty() const { return !Mass; } bool operator!() const { return isEmpty(); } @@ -90,7 +105,7 @@ public: /// Adds another mass, saturating at \a isFull() rather than overflowing. BlockMass &operator+=(BlockMass X) { uint64_t Sum = Mass + X.Mass; - Mass = Sum < Mass ? UINT64_MAX : Sum; + Mass = Sum < Mass ? std::numeric_limits<uint64_t>::max() : Sum; return *this; } @@ -159,8 +174,8 @@ template <> struct isPodLike<bfi_detail::BlockMass> { /// BlockFrequencyInfoImpl. See there for details. class BlockFrequencyInfoImplBase { public: - typedef ScaledNumber<uint64_t> Scaled64; - typedef bfi_detail::BlockMass BlockMass; + using Scaled64 = ScaledNumber<uint64_t>; + using BlockMass = bfi_detail::BlockMass; /// \brief Representative of a block. /// @@ -170,8 +185,12 @@ public: /// Unlike a block pointer, its order has meaning (location in the /// topological sort) and it's class is the same regardless of block type. struct BlockNode { - typedef uint32_t IndexType; - IndexType Index; + using IndexType = uint32_t; + + IndexType Index = std::numeric_limits<uint32_t>::max(); + + BlockNode() = default; + BlockNode(IndexType Index) : Index(Index) {} bool operator==(const BlockNode &X) const { return Index == X.Index; } bool operator!=(const BlockNode &X) const { return Index != X.Index; } @@ -180,11 +199,11 @@ public: bool operator<(const BlockNode &X) const { return Index < X.Index; } bool operator>(const BlockNode &X) const { return Index > X.Index; } - BlockNode() : Index(UINT32_MAX) {} - BlockNode(IndexType Index) : Index(Index) {} - bool isValid() const { return Index <= getMaxIndex(); } - static size_t getMaxIndex() { return UINT32_MAX - 1; } + + static size_t getMaxIndex() { + return std::numeric_limits<uint32_t>::max() - 1; + } }; /// \brief Stats about a block itself. @@ -198,12 +217,13 @@ public: /// Contains the data necessary to represent a loop as a pseudo-node once it's /// packaged. struct LoopData { - typedef SmallVector<std::pair<BlockNode, BlockMass>, 4> ExitMap; - typedef SmallVector<BlockNode, 4> NodeList; - typedef SmallVector<BlockMass, 1> HeaderMassList; + using ExitMap = SmallVector<std::pair<BlockNode, BlockMass>, 4>; + using NodeList = SmallVector<BlockNode, 4>; + using HeaderMassList = SmallVector<BlockMass, 1>; + LoopData *Parent; ///< The parent loop. - bool IsPackaged; ///< Whether this has been packaged. - uint32_t NumHeaders; ///< Number of headers. + bool IsPackaged = false; ///< Whether this has been packaged. + uint32_t NumHeaders = 1; ///< Number of headers. ExitMap Exits; ///< Successor edges (and weights). NodeList Nodes; ///< Header and the members of the loop. HeaderMassList BackedgeMass; ///< Mass returned to each loop header. @@ -211,22 +231,24 @@ public: Scaled64 Scale; LoopData(LoopData *Parent, const BlockNode &Header) - : Parent(Parent), IsPackaged(false), NumHeaders(1), Nodes(1, Header), - BackedgeMass(1) {} + : Parent(Parent), Nodes(1, Header), BackedgeMass(1) {} + template <class It1, class It2> LoopData(LoopData *Parent, It1 FirstHeader, It1 LastHeader, It2 FirstOther, It2 LastOther) - : Parent(Parent), IsPackaged(false), Nodes(FirstHeader, LastHeader) { + : Parent(Parent), Nodes(FirstHeader, LastHeader) { NumHeaders = Nodes.size(); Nodes.insert(Nodes.end(), FirstOther, LastOther); BackedgeMass.resize(NumHeaders); } + bool isHeader(const BlockNode &Node) const { if (isIrreducible()) return std::binary_search(Nodes.begin(), Nodes.begin() + NumHeaders, Node); return Node == Nodes[0]; } + BlockNode getHeader() const { return Nodes[0]; } bool isIrreducible() const { return NumHeaders > 1; } @@ -241,6 +263,7 @@ public: NodeList::const_iterator members_begin() const { return Nodes.begin() + NumHeaders; } + NodeList::const_iterator members_end() const { return Nodes.end(); } iterator_range<NodeList::const_iterator> members() const { return make_range(members_begin(), members_end()); @@ -249,13 +272,14 @@ public: /// \brief Index of loop information. struct WorkingData { - BlockNode Node; ///< This node. - LoopData *Loop; ///< The loop this block is inside. - BlockMass Mass; ///< Mass distribution from the entry block. + BlockNode Node; ///< This node. + LoopData *Loop = nullptr; ///< The loop this block is inside. + BlockMass Mass; ///< Mass distribution from the entry block. - WorkingData(const BlockNode &Node) : Node(Node), Loop(nullptr) {} + WorkingData(const BlockNode &Node) : Node(Node) {} bool isLoopHeader() const { return Loop && Loop->isHeader(Node); } + bool isDoubleLoopHeader() const { return isLoopHeader() && Loop->Parent && Loop->Parent->isIrreducible() && Loop->Parent->isHeader(Node); @@ -286,6 +310,7 @@ public: auto L = getPackagedLoop(); return L ? L->getHeader() : Node; } + LoopData *getPackagedLoop() const { if (!Loop || !Loop->IsPackaged) return nullptr; @@ -310,8 +335,10 @@ public: /// \brief Has ContainingLoop been packaged up? bool isPackaged() const { return getResolvedNode() != Node; } + /// \brief Has Loop been packaged up? bool isAPackage() const { return isLoopHeader() && Loop->IsPackaged; } + /// \brief Has Loop been packaged up twice? bool isADoublePackage() const { return isDoubleLoopHeader() && Loop->Parent->IsPackaged; @@ -333,10 +360,11 @@ public: /// backedge to the loop header? struct Weight { enum DistType { Local, Exit, Backedge }; - DistType Type; + DistType Type = Local; BlockNode TargetNode; - uint64_t Amount; - Weight() : Type(Local), Amount(0) {} + uint64_t Amount = 0; + + Weight() = default; Weight(DistType Type, BlockNode TargetNode, uint64_t Amount) : Type(Type), TargetNode(TargetNode), Amount(Amount) {} }; @@ -350,18 +378,22 @@ public: /// \a DidOverflow indicates whether \a Total did overflow while adding to /// the distribution. It should never overflow twice. struct Distribution { - typedef SmallVector<Weight, 4> WeightList; - WeightList Weights; ///< Individual successor weights. - uint64_t Total; ///< Sum of all weights. - bool DidOverflow; ///< Whether \a Total did overflow. + using WeightList = SmallVector<Weight, 4>; + + WeightList Weights; ///< Individual successor weights. + uint64_t Total = 0; ///< Sum of all weights. + bool DidOverflow = false; ///< Whether \a Total did overflow. + + Distribution() = default; - Distribution() : Total(0), DidOverflow(false) {} void addLocal(const BlockNode &Node, uint64_t Amount) { add(Node, Amount, Weight::Local); } + void addExit(const BlockNode &Node, uint64_t Amount) { add(Node, Amount, Weight::Exit); } + void addBackedge(const BlockNode &Node, uint64_t Amount) { add(Node, Amount, Weight::Backedge); } @@ -384,12 +416,22 @@ public: /// \brief Data about each block. This is used downstream. std::vector<FrequencyData> Freqs; + /// \brief Whether each block is an irreducible loop header. + /// This is used downstream. + SparseBitVector<> IsIrrLoopHeader; + /// \brief Loop data: see initializeLoops(). std::vector<WorkingData> Working; /// \brief Indexed information about loops. std::list<LoopData> Loops; + /// \brief Virtual destructor. + /// + /// Need a virtual destructor to mask the compiler warning about + /// getBlockName(). + virtual ~BlockFrequencyInfoImplBase() = default; + /// \brief Add all edges out of a packaged loop to the distribution. /// /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each @@ -456,6 +498,8 @@ public: /// the backedges going into each of the loop headers. void adjustLoopHeaderMass(LoopData &Loop); + void distributeIrrLoopHeaderMass(Distribution &Dist); + /// \brief Package up a loop. void packageLoop(LoopData &Loop); @@ -484,6 +528,7 @@ public: const BlockNode &Node) const; Optional<uint64_t> getProfileCountFromFreq(const Function &F, uint64_t Freq) const; + bool isIrrLoopHeader(const BlockNode &Node); void setBlockFreq(const BlockNode &Node, uint64_t Freq); @@ -495,28 +540,24 @@ public: assert(!Freqs.empty()); return Freqs[0].Integer; } - /// \brief Virtual destructor. - /// - /// Need a virtual destructor to mask the compiler warning about - /// getBlockName(). - virtual ~BlockFrequencyInfoImplBase() {} }; namespace bfi_detail { + template <class BlockT> struct TypeMap {}; template <> struct TypeMap<BasicBlock> { - typedef BasicBlock BlockT; - typedef Function FunctionT; - typedef BranchProbabilityInfo BranchProbabilityInfoT; - typedef Loop LoopT; - typedef LoopInfo LoopInfoT; + using BlockT = BasicBlock; + using FunctionT = Function; + using BranchProbabilityInfoT = BranchProbabilityInfo; + using LoopT = Loop; + using LoopInfoT = LoopInfo; }; template <> struct TypeMap<MachineBasicBlock> { - typedef MachineBasicBlock BlockT; - typedef MachineFunction FunctionT; - typedef MachineBranchProbabilityInfo BranchProbabilityInfoT; - typedef MachineLoop LoopT; - typedef MachineLoopInfo LoopInfoT; + using BlockT = MachineBasicBlock; + using FunctionT = MachineFunction; + using BranchProbabilityInfoT = MachineBranchProbabilityInfo; + using LoopT = MachineLoop; + using LoopInfoT = MachineLoopInfo; }; /// \brief Get the name of a MachineBasicBlock. @@ -554,25 +595,27 @@ template <> inline std::string getBlockName(const BasicBlock *BB) { /// and it explicitly lists predecessors and successors. The initialization /// that relies on \c MachineBasicBlock is defined in the header. struct IrreducibleGraph { - typedef BlockFrequencyInfoImplBase BFIBase; + using BFIBase = BlockFrequencyInfoImplBase; BFIBase &BFI; - typedef BFIBase::BlockNode BlockNode; + using BlockNode = BFIBase::BlockNode; struct IrrNode { BlockNode Node; - unsigned NumIn; + unsigned NumIn = 0; std::deque<const IrrNode *> Edges; - IrrNode(const BlockNode &Node) : Node(Node), NumIn(0) {} - typedef std::deque<const IrrNode *>::const_iterator iterator; + IrrNode(const BlockNode &Node) : Node(Node) {} + + using iterator = std::deque<const IrrNode *>::const_iterator; + iterator pred_begin() const { return Edges.begin(); } iterator succ_begin() const { return Edges.begin() + NumIn; } iterator pred_end() const { return succ_begin(); } iterator succ_end() const { return Edges.end(); } }; BlockNode Start; - const IrrNode *StartIrr; + const IrrNode *StartIrr = nullptr; std::vector<IrrNode> Nodes; SmallDenseMap<uint32_t, IrrNode *, 4> Lookup; @@ -587,8 +630,7 @@ struct IrreducibleGraph { /// user of this. template <class BlockEdgesAdder> IrreducibleGraph(BFIBase &BFI, const BFIBase::LoopData *OuterLoop, - BlockEdgesAdder addBlockEdges) - : BFI(BFI), StartIrr(nullptr) { + BlockEdgesAdder addBlockEdges) : BFI(BFI) { initialize(OuterLoop, addBlockEdges); } @@ -597,10 +639,12 @@ struct IrreducibleGraph { BlockEdgesAdder addBlockEdges); void addNodesInLoop(const BFIBase::LoopData &OuterLoop); void addNodesInFunction(); + void addNode(const BlockNode &Node) { Nodes.emplace_back(Node); BFI.Working[Node.Index].getMass() = BlockMass::getEmpty(); } + void indexNodes(); template <class BlockEdgesAdder> void addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, @@ -608,6 +652,7 @@ struct IrreducibleGraph { void addEdge(IrrNode &Irr, const BlockNode &Succ, const BFIBase::LoopData *OuterLoop); }; + template <class BlockEdgesAdder> void IrreducibleGraph::initialize(const BFIBase::LoopData *OuterLoop, BlockEdgesAdder addBlockEdges) { @@ -622,6 +667,7 @@ void IrreducibleGraph::initialize(const BFIBase::LoopData *OuterLoop, } StartIrr = Lookup[Start.Index]; } + template <class BlockEdgesAdder> void IrreducibleGraph::addEdges(const BlockNode &Node, const BFIBase::LoopData *OuterLoop, @@ -638,7 +684,8 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, else addBlockEdges(*this, Irr, OuterLoop); } -} + +} // end namespace bfi_detail /// \brief Shared implementation for block frequency analysis. /// @@ -794,28 +841,27 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// (Running this until fixed point would "solve" the geometric /// series by simulation.) template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { - typedef typename bfi_detail::TypeMap<BT>::BlockT BlockT; - typedef typename bfi_detail::TypeMap<BT>::FunctionT FunctionT; - typedef typename bfi_detail::TypeMap<BT>::BranchProbabilityInfoT - BranchProbabilityInfoT; - typedef typename bfi_detail::TypeMap<BT>::LoopT LoopT; - typedef typename bfi_detail::TypeMap<BT>::LoopInfoT LoopInfoT; - // This is part of a workaround for a GCC 4.7 crash on lambdas. friend struct bfi_detail::BlockEdgesAdder<BT>; - typedef GraphTraits<const BlockT *> Successor; - typedef GraphTraits<Inverse<const BlockT *>> Predecessor; + using BlockT = typename bfi_detail::TypeMap<BT>::BlockT; + using FunctionT = typename bfi_detail::TypeMap<BT>::FunctionT; + using BranchProbabilityInfoT = + typename bfi_detail::TypeMap<BT>::BranchProbabilityInfoT; + using LoopT = typename bfi_detail::TypeMap<BT>::LoopT; + using LoopInfoT = typename bfi_detail::TypeMap<BT>::LoopInfoT; + using Successor = GraphTraits<const BlockT *>; + using Predecessor = GraphTraits<Inverse<const BlockT *>>; - const BranchProbabilityInfoT *BPI; - const LoopInfoT *LI; - const FunctionT *F; + const BranchProbabilityInfoT *BPI = nullptr; + const LoopInfoT *LI = nullptr; + const FunctionT *F = nullptr; // All blocks in reverse postorder. std::vector<const BlockT *> RPOT; DenseMap<const BlockT *, BlockNode> Nodes; - typedef typename std::vector<const BlockT *>::const_iterator rpot_iterator; + using rpot_iterator = typename std::vector<const BlockT *>::const_iterator; rpot_iterator rpot_begin() const { return RPOT.begin(); } rpot_iterator rpot_end() const { return RPOT.end(); } @@ -913,25 +959,35 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { } public: + BlockFrequencyInfoImpl() = default; + const FunctionT *getFunction() const { return F; } void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI, const LoopInfoT &LI); - BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {} using BlockFrequencyInfoImplBase::getEntryFreq; + BlockFrequency getBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); } + Optional<uint64_t> getBlockProfileCount(const Function &F, const BlockT *BB) const { return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB)); } + Optional<uint64_t> getProfileCountFromFreq(const Function &F, uint64_t Freq) const { return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq); } + + bool isIrrLoopHeader(const BlockT *BB) { + return BlockFrequencyInfoImplBase::isIrrLoopHeader(getNode(BB)); + } + void setBlockFreq(const BlockT *BB, uint64_t Freq); + Scaled64 getFloatingBlockFreq(const BlockT *BB) const { return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); } @@ -950,9 +1006,10 @@ public: /// \a BlockFrequencyInfoImplBase::print() only knows reverse post-order, so /// we need to override it here. raw_ostream &print(raw_ostream &OS) const override; - using BlockFrequencyInfoImplBase::dump; + using BlockFrequencyInfoImplBase::dump; using BlockFrequencyInfoImplBase::printBlockFreq; + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const { return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB)); } @@ -1096,17 +1153,59 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n"); if (Loop.isIrreducible()) { - BlockMass Remaining = BlockMass::getFull(); + DEBUG(dbgs() << "isIrreducible = true\n"); + Distribution Dist; + unsigned NumHeadersWithWeight = 0; + Optional<uint64_t> MinHeaderWeight; + DenseSet<uint32_t> HeadersWithoutWeight; + HeadersWithoutWeight.reserve(Loop.NumHeaders); for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { - auto &Mass = Working[Loop.Nodes[H].Index].getMass(); - Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H); - Remaining -= Mass; + auto &HeaderNode = Loop.Nodes[H]; + const BlockT *Block = getBlock(HeaderNode); + IsIrrLoopHeader.set(Loop.Nodes[H].Index); + Optional<uint64_t> HeaderWeight = Block->getIrrLoopHeaderWeight(); + if (!HeaderWeight) { + DEBUG(dbgs() << "Missing irr loop header metadata on " + << getBlockName(HeaderNode) << "\n"); + HeadersWithoutWeight.insert(H); + continue; + } + DEBUG(dbgs() << getBlockName(HeaderNode) + << " has irr loop header weight " << HeaderWeight.getValue() + << "\n"); + NumHeadersWithWeight++; + uint64_t HeaderWeightValue = HeaderWeight.getValue(); + if (!MinHeaderWeight || HeaderWeightValue < MinHeaderWeight) + MinHeaderWeight = HeaderWeightValue; + if (HeaderWeightValue) { + Dist.addLocal(HeaderNode, HeaderWeightValue); + } + } + // As a heuristic, if some headers don't have a weight, give them the + // minimium weight seen (not to disrupt the existing trends too much by + // using a weight that's in the general range of the other headers' weights, + // and the minimum seems to perform better than the average.) + // FIXME: better update in the passes that drop the header weight. + // If no headers have a weight, give them even weight (use weight 1). + if (!MinHeaderWeight) + MinHeaderWeight = 1; + for (uint32_t H : HeadersWithoutWeight) { + auto &HeaderNode = Loop.Nodes[H]; + assert(!getBlock(HeaderNode)->getIrrLoopHeaderWeight() && + "Shouldn't have a weight metadata"); + uint64_t MinWeight = MinHeaderWeight.getValue(); + DEBUG(dbgs() << "Giving weight " << MinWeight + << " to " << getBlockName(HeaderNode) << "\n"); + if (MinWeight) + Dist.addLocal(HeaderNode, MinWeight); } + distributeIrrLoopHeaderMass(Dist); for (const BlockNode &M : Loop.Nodes) if (!propagateMassToSuccessors(&Loop, M)) llvm_unreachable("unhandled irreducible control flow"); - - adjustLoopHeaderMass(Loop); + if (NumHeadersWithWeight == 0) + // No headers have a metadata. Adjust header mass. + adjustLoopHeaderMass(Loop); } else { Working[Loop.getHeader().Index].getMass() = BlockMass::getFull(); if (!propagateMassToSuccessors(&Loop, Loop.getHeader())) @@ -1153,14 +1252,17 @@ template <class BT> void BlockFrequencyInfoImpl<BT>::computeMassInFunction() { /// \note This should be a lambda, but that crashes GCC 4.7. namespace bfi_detail { + template <class BT> struct BlockEdgesAdder { - typedef BT BlockT; - typedef BlockFrequencyInfoImplBase::LoopData LoopData; - typedef GraphTraits<const BlockT *> Successor; + using BlockT = BT; + using LoopData = BlockFrequencyInfoImplBase::LoopData; + using Successor = GraphTraits<const BlockT *>; const BlockFrequencyInfoImpl<BT> &BFI; + explicit BlockEdgesAdder(const BlockFrequencyInfoImpl<BT> &BFI) : BFI(BFI) {} + void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, const LoopData *OuterLoop) { const BlockT *BB = BFI.RPOT[Irr.Node.Index]; @@ -1168,7 +1270,9 @@ template <class BT> struct BlockEdgesAdder { G.addEdge(Irr, BFI.getNode(Succ), OuterLoop); } }; -} + +} // end namespace bfi_detail + template <class BT> void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass( LoopData *OuterLoop, std::list<LoopData>::iterator Insert) { @@ -1177,6 +1281,7 @@ void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass( else dbgs() << "function\n"); using namespace bfi_detail; + // Ideally, addBlockEdges() would be declared here as a lambda, but that // crashes GCC 4.7. BlockEdgesAdder<BT> addBlockEdges(*this); @@ -1209,9 +1314,12 @@ BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop, return false; } else { const BlockT *BB = getBlock(Node); - for (const auto Succ : children<const BlockT *>(BB)) - if (!addToDist(Dist, OuterLoop, Node, getNode(Succ), - getWeightFromBranchProb(BPI->getEdgeProbability(BB, Succ)))) + for (auto SI = GraphTraits<const BlockT *>::child_begin(BB), + SE = GraphTraits<const BlockT *>::child_end(BB); + SI != SE; ++SI) + if (!addToDist( + Dist, OuterLoop, Node, getNode(*SI), + getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI)))) // Irreducible backedge. return false; } @@ -1230,7 +1338,15 @@ raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const { for (const BlockT &BB : *F) { OS << " - " << bfi_detail::getBlockName(&BB) << ": float = "; getFloatingBlockFreq(&BB).print(OS, 5) - << ", int = " << getBlockFreq(&BB).getFrequency() << "\n"; + << ", int = " << getBlockFreq(&BB).getFrequency(); + if (Optional<uint64_t> ProfileCount = + BlockFrequencyInfoImplBase::getBlockProfileCount( + F->getFunction(), getNode(&BB))) + OS << ", count = " << ProfileCount.getValue(); + if (Optional<uint64_t> IrrLoopHeaderWeight = + BB.getIrrLoopHeaderWeight()) + OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue(); + OS << "\n"; } // Add an extra newline for readability. @@ -1245,15 +1361,16 @@ enum GVDAGType { GVDT_None, GVDT_Fraction, GVDT_Integer, GVDT_Count }; template <class BlockFrequencyInfoT, class BranchProbabilityInfoT> struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { + using GTraits = GraphTraits<BlockFrequencyInfoT *>; + using NodeRef = typename GTraits::NodeRef; + using EdgeIter = typename GTraits::ChildIteratorType; + using NodeIter = typename GTraits::nodes_iterator; + + uint64_t MaxFrequency = 0; + explicit BFIDOTGraphTraitsBase(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} - typedef GraphTraits<BlockFrequencyInfoT *> GTraits; - typedef typename GTraits::NodeRef NodeRef; - typedef typename GTraits::ChildIteratorType EdgeIter; - typedef typename GTraits::nodes_iterator NodeIter; - - uint64_t MaxFrequency = 0; static std::string getGraphName(const BlockFrequencyInfoT *G) { return G->getFunction()->getName(); } diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 94d3d4de6c9d..417b64978811 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -1,4 +1,4 @@ -//===--- BranchProbabilityInfo.h - Branch Probability Analysis --*- C++ -*-===// +//===- BranchProbabilityInfo.h - Branch Probability Analysis ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,19 +15,28 @@ #define LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <utility> namespace llvm { + +class Function; class LoopInfo; -class TargetLibraryInfo; class raw_ostream; +class TargetLibraryInfo; +class Value; /// \brief Analysis providing branch probability information. /// @@ -43,7 +52,8 @@ class raw_ostream; /// value 10. class BranchProbabilityInfo { public: - BranchProbabilityInfo() {} + BranchProbabilityInfo() = default; + BranchProbabilityInfo(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI = nullptr) { calculate(F, LI, TLI); @@ -54,6 +64,9 @@ public: PostDominatedByUnreachable(std::move(Arg.PostDominatedByUnreachable)), PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {} + BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; + BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete; + BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { releaseMemory(); Probs = std::move(RHS.Probs); @@ -124,14 +137,21 @@ public: /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); -private: - void operator=(const BranchProbabilityInfo &) = delete; - BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; + // Use to track SCCs for handling irreducible loops. + using SccMap = DenseMap<const BasicBlock *, int>; + using SccHeaderMap = DenseMap<const BasicBlock *, bool>; + using SccHeaderMaps = std::vector<SccHeaderMap>; + struct SccInfo { + SccMap SccNums; + SccHeaderMaps SccHeaders; + }; +private: // We need to store CallbackVH's in order to correctly handle basic block // removal. class BasicBlockCallbackVH final : public CallbackVH { BranchProbabilityInfo *BPI; + void deleted() override { assert(BPI != nullptr); BPI->eraseBlock(cast<BasicBlock>(getValPtr())); @@ -139,14 +159,15 @@ private: } public: - BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI=nullptr) + BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI = nullptr) : CallbackVH(const_cast<Value *>(V)), BPI(BPI) {} }; + DenseSet<BasicBlockCallbackVH, DenseMapInfo<Value*>> Handles; // Since we allow duplicate edges from one basic block to another, we use // a pair (PredBlock and an index in the successors) to specify an edge. - typedef std::pair<const BasicBlock *, unsigned> Edge; + using Edge = std::pair<const BasicBlock *, unsigned>; // Default weight value. Used when we don't have information about the edge. // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of @@ -173,7 +194,8 @@ private: bool calcMetadataWeights(const BasicBlock *BB); bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); - bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); + bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI, + SccInfo &SccI); bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); bool calcInvokeHeuristics(const BasicBlock *BB); @@ -183,11 +205,12 @@ private: class BranchProbabilityAnalysis : public AnalysisInfoMixin<BranchProbabilityAnalysis> { friend AnalysisInfoMixin<BranchProbabilityAnalysis>; + static AnalysisKey Key; public: - /// \brief Provide the result typedef for this analysis pass. - typedef BranchProbabilityInfo Result; + /// \brief Provide the result type for this analysis pass. + using Result = BranchProbabilityInfo; /// \brief Run the analysis pass over a function and produce BPI. BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM); @@ -200,6 +223,7 @@ class BranchProbabilityPrinterPass public: explicit BranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; @@ -224,6 +248,6 @@ public: void print(raw_ostream &OS, const Module *M = nullptr) const override; }; -} +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h index 35165f4061f1..eab64176f0d7 100644 --- a/include/llvm/Analysis/CFG.h +++ b/include/llvm/Analysis/CFG.h @@ -59,7 +59,7 @@ bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, /// This function is linear with respect to the number of blocks in the CFG, /// walking down successors from From to reach To, with a fixed threshold. /// Using DT or LI allows us to answer more quickly. LI reduces the cost of -/// an entire loop of any number of blocsk to be the same as the cost of a +/// an entire loop of any number of blocks to be the same as the cost of a /// single block. DT reduces the cost by allowing the search to terminate when /// we find a block that dominates the block containing 'To'. DT is most useful /// on branchy code but not loops, and LI is most useful on code with loops but diff --git a/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/include/llvm/Analysis/CFLAndersAliasAnalysis.h index 4146ad4d18ac..6239d5309581 100644 --- a/include/llvm/Analysis/CFLAndersAliasAnalysis.h +++ b/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -1,4 +1,4 @@ -//=- CFLAndersAliasAnalysis.h - Unification-based Alias Analysis ---*- C++-*-=// +//==- CFLAndersAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==// // // The LLVM Compiler Infrastructure // @@ -19,25 +19,31 @@ #include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFLAliasAnalysisUtils.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include <forward_list> +#include <memory> namespace llvm { +class Function; +class MemoryLocation; class TargetLibraryInfo; namespace cflaa { + struct AliasSummary; -} + +} // end namespace cflaa class CFLAndersAAResult : public AAResultBase<CFLAndersAAResult> { friend AAResultBase<CFLAndersAAResult>; + class FunctionInfo; public: - explicit CFLAndersAAResult(const TargetLibraryInfo &); - CFLAndersAAResult(CFLAndersAAResult &&); + explicit CFLAndersAAResult(const TargetLibraryInfo &TLI); + CFLAndersAAResult(CFLAndersAAResult &&RHS); ~CFLAndersAAResult(); /// Handle invalidation events from the new pass manager. @@ -46,6 +52,7 @@ public: FunctionAnalysisManager::Invalidator &) { return false; } + /// Evict the given function from cache void evict(const Function *Fn); @@ -85,10 +92,11 @@ private: /// in particular to leverage invalidation to trigger re-computation. class CFLAndersAA : public AnalysisInfoMixin<CFLAndersAA> { friend AnalysisInfoMixin<CFLAndersAA>; + static AnalysisKey Key; public: - typedef CFLAndersAAResult Result; + using Result = CFLAndersAAResult; CFLAndersAAResult run(Function &F, FunctionAnalysisManager &AM); }; @@ -109,12 +117,10 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; -//===--------------------------------------------------------------------===// -// // createCFLAndersAAWrapperPass - This pass implements a set-based approach to // alias analysis. -// ImmutablePass *createCFLAndersAAWrapperPass(); -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CFLANDERSALIASANALYSIS_H diff --git a/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/include/llvm/Analysis/CFLSteensAliasAnalysis.h index fd3fa5febcdf..ee9e29046af8 100644 --- a/include/llvm/Analysis/CFLSteensAliasAnalysis.h +++ b/include/llvm/Analysis/CFLSteensAliasAnalysis.h @@ -1,4 +1,4 @@ -//=- CFLSteensAliasAnalysis.h - Unification-based Alias Analysis ---*- C++-*-=// +//==- CFLSteensAliasAnalysis.h - Unification-based Alias Analysis -*- C++-*-==// // // The LLVM Compiler Infrastructure // @@ -16,30 +16,34 @@ #define LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFLAliasAnalysisUtils.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/ValueHandle.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include <forward_list> +#include <memory> namespace llvm { +class Function; class TargetLibraryInfo; namespace cflaa { + struct AliasSummary; -} + +} // end namespace cflaa class CFLSteensAAResult : public AAResultBase<CFLSteensAAResult> { friend AAResultBase<CFLSteensAAResult>; + class FunctionInfo; public: - explicit CFLSteensAAResult(const TargetLibraryInfo &); + explicit CFLSteensAAResult(const TargetLibraryInfo &TLI); CFLSteensAAResult(CFLSteensAAResult &&Arg); ~CFLSteensAAResult(); @@ -68,7 +72,7 @@ public: AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { if (LocA.Ptr == LocB.Ptr) - return LocA.Size == LocB.Size ? MustAlias : PartialAlias; + return MustAlias; // Comparisons between global variables and other constants should be // handled by BasicAA. @@ -105,10 +109,11 @@ private: /// in particular to leverage invalidation to trigger re-computation of sets. class CFLSteensAA : public AnalysisInfoMixin<CFLSteensAA> { friend AnalysisInfoMixin<CFLSteensAA>; + static AnalysisKey Key; public: - typedef CFLSteensAAResult Result; + using Result = CFLSteensAAResult; CFLSteensAAResult run(Function &F, FunctionAnalysisManager &AM); }; @@ -129,12 +134,10 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; -//===--------------------------------------------------------------------===// -// // createCFLSteensAAWrapperPass - This pass implements a set-based approach to // alias analysis. -// ImmutablePass *createCFLSteensAAWrapperPass(); -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CFLSTEENSALIASANALYSIS_H diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index 32868cbecdcf..8123cbad22ff 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -89,29 +89,44 @@ #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PriorityWorklist.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <utility> namespace llvm { struct CGSCCUpdateResult; +class Module; + +// Allow debug logging in this inline function. +#define DEBUG_TYPE "cgscc" /// Extern template declaration for the analysis set for this IR unit. extern template class AllAnalysesOn<LazyCallGraph::SCC>; extern template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; + /// \brief The CGSCC analysis manager. /// /// See the documentation for the AnalysisManager template for detail -/// documentation. This typedef serves as a convenient way to refer to this +/// documentation. This type serves as a convenient way to refer to this /// construct in the adaptors and proxies used to integrate this into the larger /// pass manager infrastructure. -typedef AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &> - CGSCCAnalysisManager; +using CGSCCAnalysisManager = + AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; // Explicit specialization and instantiation declarations for the pass manager. // See the comments on the definition of the specialization for details on how @@ -129,10 +144,10 @@ extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, /// /// See the documentation for the PassManager template for details. It runs /// a sequence of SCC passes over each SCC that the manager is run over. This -/// typedef serves as a convenient way to refer to this construct. -typedef PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, - CGSCCUpdateResult &> - CGSCCPassManager; +/// type serves as a convenient way to refer to this construct. +using CGSCCPassManager = + PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, + CGSCCUpdateResult &>; /// An explicit specialization of the require analysis template pass. template <typename AnalysisT> @@ -149,8 +164,8 @@ struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager, }; /// A proxy from a \c CGSCCAnalysisManager to a \c Module. -typedef InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module> - CGSCCAnalysisManagerModuleProxy; +using CGSCCAnalysisManagerModuleProxy = + InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; /// We need a specialized result for the \c CGSCCAnalysisManagerModuleProxy so /// it can have access to the call graph in order to walk all the SCCs when @@ -193,10 +208,11 @@ extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; extern template class OuterAnalysisManagerProxy< ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>; + /// A proxy from a \c ModuleAnalysisManager to an \c SCC. -typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC, - LazyCallGraph &> - ModuleAnalysisManagerCGSCCProxy; +using ModuleAnalysisManagerCGSCCProxy = + OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC, + LazyCallGraph &>; /// Support structure for SCC passes to communicate updates the call graph back /// to the CGSCC pass manager infrsatructure. @@ -275,6 +291,15 @@ struct CGSCCUpdateResult { /// non-null and can be used to continue processing the "top" of the /// post-order walk. LazyCallGraph::SCC *UpdatedC; + + /// A hacky area where the inliner can retain history about inlining + /// decisions that mutated the call graph's SCC structure in order to avoid + /// infinite inlining. See the comments in the inliner's CG update logic. + /// + /// FIXME: Keeping this here seems like a big layering issue, we should look + /// for a better technique. + SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> + &InlinedInternalEdges; }; /// \brief The core module pass which does a post-order walk of the SCCs and @@ -290,21 +315,23 @@ template <typename CGSCCPassT> class ModuleToPostOrderCGSCCPassAdaptor : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> { public: - explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass, bool DebugLogging = false) - : Pass(std::move(Pass)), DebugLogging(DebugLogging) {} + explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) + : Pass(std::move(Pass)) {} + // We have to explicitly define all the special member functions because MSVC // refuses to generate them. ModuleToPostOrderCGSCCPassAdaptor( const ModuleToPostOrderCGSCCPassAdaptor &Arg) - : Pass(Arg.Pass), DebugLogging(Arg.DebugLogging) {} + : Pass(Arg.Pass) {} + ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg) - : Pass(std::move(Arg.Pass)), DebugLogging(Arg.DebugLogging) {} + : Pass(std::move(Arg.Pass)) {} + friend void swap(ModuleToPostOrderCGSCCPassAdaptor &LHS, ModuleToPostOrderCGSCCPassAdaptor &RHS) { - using std::swap; - swap(LHS.Pass, RHS.Pass); - swap(LHS.DebugLogging, RHS.DebugLogging); + std::swap(LHS.Pass, RHS.Pass); } + ModuleToPostOrderCGSCCPassAdaptor & operator=(ModuleToPostOrderCGSCCPassAdaptor RHS) { swap(*this, RHS); @@ -330,8 +357,12 @@ public: SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; - CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet, - InvalidSCCSet, nullptr, nullptr}; + SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> + InlinedInternalEdges; + + CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet, + InvalidSCCSet, nullptr, nullptr, + InlinedInternalEdges}; PreservedAnalyses PA = PreservedAnalyses::all(); CG.buildRefSCCs(); @@ -356,20 +387,19 @@ public: do { LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); if (InvalidRefSCCSet.count(RC)) { - if (DebugLogging) - dbgs() << "Skipping an invalid RefSCC...\n"; + DEBUG(dbgs() << "Skipping an invalid RefSCC...\n"); continue; } assert(CWorklist.empty() && "Should always start with an empty SCC worklist"); - if (DebugLogging) - dbgs() << "Running an SCC pass across the RefSCC: " << *RC << "\n"; + DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC + << "\n"); // Push the initial SCCs in reverse post-order as we'll pop off the the // back and so see this in post-order. - for (LazyCallGraph::SCC &C : reverse(*RC)) + for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) CWorklist.insert(&C); do { @@ -379,14 +409,12 @@ public: // other RefSCCs should be queued above, so we just need to skip both // scenarios here. if (InvalidSCCSet.count(C)) { - if (DebugLogging) - dbgs() << "Skipping an invalid SCC...\n"; + DEBUG(dbgs() << "Skipping an invalid SCC...\n"); continue; } if (&C->getOuterRefSCC() != RC) { - if (DebugLogging) - dbgs() << "Skipping an SCC that is now part of some other " - "RefSCC...\n"; + DEBUG(dbgs() << "Skipping an SCC that is now part of some other " + "RefSCC...\n"); continue; } @@ -401,13 +429,26 @@ public: UR.UpdatedC = nullptr; PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR); + // Update the SCC and RefSCC if necessary. + C = UR.UpdatedC ? UR.UpdatedC : C; + RC = UR.UpdatedRC ? UR.UpdatedRC : RC; + + // If the CGSCC pass wasn't able to provide a valid updated SCC, + // the current SCC may simply need to be skipped if invalid. + if (UR.InvalidatedSCCs.count(C)) { + DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); + break; + } + // Check that we didn't miss any update scenario. + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + // We handle invalidating the CGSCC analysis manager's information // for the (potentially updated) SCC here. Note that any other SCCs // whose structure has changed should have been invalidated by // whatever was updating the call graph. This SCC gets invalidated // late as it contains the nodes that were actively being // processed. - CGAM.invalidate(*(UR.UpdatedC ? UR.UpdatedC : C), PassPA); + CGAM.invalidate(*C, PassPA); // Then intersect the preserved set so that invalidation of module // analyses will eventually occur when the module pass completes. @@ -422,19 +463,21 @@ public: // apart, at most converging on a DAG of single nodes. // FIXME: If we ever start having RefSCC passes, we'll want to // iterate there too. - RC = UR.UpdatedRC ? UR.UpdatedRC : RC; - C = UR.UpdatedC ? UR.UpdatedC : C; - if (DebugLogging && UR.UpdatedC) - dbgs() << "Re-running SCC passes after a refinement of the " - "current SCC: " - << *UR.UpdatedC << "\n"; + if (UR.UpdatedC) + DEBUG(dbgs() << "Re-running SCC passes after a refinement of the " + "current SCC: " + << *UR.UpdatedC << "\n"); // Note that both `C` and `RC` may at this point refer to deleted, // invalid SCC and RefSCCs respectively. But we will short circuit // the processing when we check them in the loop above. } while (UR.UpdatedC); - } while (!CWorklist.empty()); + + // We only need to keep internal inlined edge information within + // a RefSCC, clear it to save on space and let the next time we visit + // any of these functions have a fresh start. + InlinedInternalEdges.clear(); } while (!RCWorklist.empty()); } @@ -449,15 +492,14 @@ public: private: CGSCCPassT Pass; - bool DebugLogging; }; /// \brief A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename CGSCCPassT> ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT> -createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass, bool DebugLogging = false) { - return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass), DebugLogging); +createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { + return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass)); } /// A proxy from a \c FunctionAnalysisManager to an \c SCC. @@ -490,13 +532,15 @@ public: private: friend AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy>; + static AnalysisKey Key; }; extern template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; + /// A proxy from a \c CGSCCAnalysisManager to a \c Function. -typedef OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function> - CGSCCAnalysisManagerFunctionProxy; +using CGSCCAnalysisManagerFunctionProxy = + OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; /// Helper to update the call graph after running a function pass. /// @@ -506,7 +550,7 @@ typedef OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function> /// update result struct for the overall CGSCC walk. LazyCallGraph::SCC &updateCGAndAnalysisManagerForFunctionPass( LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, - CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging = false); + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR); /// \brief Adaptor that maps from a SCC to its functions. /// @@ -520,20 +564,22 @@ template <typename FunctionPassT> class CGSCCToFunctionPassAdaptor : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> { public: - explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass, bool DebugLogging = false) - : Pass(std::move(Pass)), DebugLogging(DebugLogging) {} + explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass) + : Pass(std::move(Pass)) {} + // We have to explicitly define all the special member functions because MSVC // refuses to generate them. CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg) - : Pass(Arg.Pass), DebugLogging(Arg.DebugLogging) {} + : Pass(Arg.Pass) {} + CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) - : Pass(std::move(Arg.Pass)), DebugLogging(Arg.DebugLogging) {} + : Pass(std::move(Arg.Pass)) {} + friend void swap(CGSCCToFunctionPassAdaptor &LHS, CGSCCToFunctionPassAdaptor &RHS) { - using std::swap; - swap(LHS.Pass, RHS.Pass); - swap(LHS.DebugLogging, RHS.DebugLogging); + std::swap(LHS.Pass, RHS.Pass); } + CGSCCToFunctionPassAdaptor &operator=(CGSCCToFunctionPassAdaptor RHS) { swap(*this, RHS); return *this; @@ -555,8 +601,7 @@ public: // a pointer we can overwrite. LazyCallGraph::SCC *CurrentC = &C; - if (DebugLogging) - dbgs() << "Running function passes across an SCC: " << C << "\n"; + DEBUG(dbgs() << "Running function passes across an SCC: " << C << "\n"); PreservedAnalyses PA = PreservedAnalyses::all(); for (LazyCallGraph::Node *N : Nodes) { @@ -582,8 +627,8 @@ public: // a smaller, more refined SCC. auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { - CurrentC = &updateCGAndAnalysisManagerForFunctionPass( - CG, *CurrentC, *N, AM, UR, DebugLogging); + CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, + AM, UR); assert( CG.lookupSCC(*N) == CurrentC && "Current SCC not updated to the SCC containing the current node!"); @@ -605,16 +650,14 @@ public: private: FunctionPassT Pass; - bool DebugLogging; }; /// \brief A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename FunctionPassT> CGSCCToFunctionPassAdaptor<FunctionPassT> -createCGSCCToFunctionPassAdaptor(FunctionPassT Pass, bool DebugLogging = false) { - return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass), - DebugLogging); +createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { + return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass)); } /// A helper that repeats an SCC pass each time an indirect call is refined to @@ -635,10 +678,8 @@ template <typename PassT> class DevirtSCCRepeatedPass : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> { public: - explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations, - bool DebugLogging = false) - : Pass(std::move(Pass)), MaxIterations(MaxIterations), - DebugLogging(DebugLogging) {} + explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations) + : Pass(std::move(Pass)), MaxIterations(MaxIterations) {} /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating /// whenever an indirect call is refined. @@ -716,16 +757,15 @@ public: if (!F) return false; - if (DebugLogging) - dbgs() << "Found devirutalized call from " - << CS.getParent()->getParent()->getName() << " to " - << F->getName() << "\n"; + DEBUG(dbgs() << "Found devirutalized call from " + << CS.getParent()->getParent()->getName() << " to " + << F->getName() << "\n"); // We now have a direct call where previously we had an indirect call, // so iterate to process this devirtualization site. return true; }; - bool Devirt = any_of(CallHandles, IsDevirtualizedHandle); + bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle); // Rescan to build up a new set of handles and count how many direct // calls remain. If we decide to iterate, this also sets up the input to @@ -753,17 +793,16 @@ public: // Otherwise, if we've already hit our max, we're done. if (Iteration >= MaxIterations) { - if (DebugLogging) - dbgs() << "Found another devirtualization after hitting the max " - "number of repetitions (" - << MaxIterations << ") on SCC: " << *C << "\n"; + DEBUG(dbgs() << "Found another devirtualization after hitting the max " + "number of repetitions (" + << MaxIterations << ") on SCC: " << *C << "\n"); PA.intersect(std::move(PassPA)); break; } - if (DebugLogging) - dbgs() << "Repeating an SCC pass after finding a devirtualization in: " - << *C << "\n"; + DEBUG(dbgs() + << "Repeating an SCC pass after finding a devirtualization in: " + << *C << "\n"); // Move over the new call counts in preparation for iterating. CallCounts = std::move(NewCallCounts); @@ -783,18 +822,19 @@ public: private: PassT Pass; int MaxIterations; - bool DebugLogging; }; /// \brief A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename PassT> -DevirtSCCRepeatedPass<PassT> -createDevirtSCCRepeatedPass(PassT Pass, int MaxIterations, - bool DebugLogging = false) { - return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations, - DebugLogging); -} +DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass, + int MaxIterations) { + return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations); } -#endif +// Clear out the debug logging macro. +#undef DEBUG_TYPE + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_CGSCCPASSMANAGER_H diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h index 01469a25c96c..c5687def3ebe 100644 --- a/include/llvm/Analysis/CallGraph.h +++ b/include/llvm/Analysis/CallGraph.h @@ -54,13 +54,17 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" +#include <cassert> #include <map> +#include <memory> +#include <utility> +#include <vector> namespace llvm { -class Function; -class Module; class CallGraphNode; +class Module; +class raw_ostream; /// \brief The basic data container for the call graph of a \c Module of IR. /// @@ -70,8 +74,8 @@ class CallGraphNode; class CallGraph { Module &M; - typedef std::map<const Function *, std::unique_ptr<CallGraphNode>> - FunctionMapTy; + using FunctionMapTy = + std::map<const Function *, std::unique_ptr<CallGraphNode>>; /// \brief A map from \c Function* to \c CallGraphNode*. FunctionMapTy FunctionMap; @@ -103,8 +107,8 @@ public: void print(raw_ostream &OS) const; void dump() const; - typedef FunctionMapTy::iterator iterator; - typedef FunctionMapTy::const_iterator const_iterator; + using iterator = FunctionMapTy::iterator; + using const_iterator = FunctionMapTy::const_iterator; /// \brief Returns the module the call graph corresponds to. Module &getModule() const { return M; } @@ -162,20 +166,23 @@ class CallGraphNode { public: /// \brief A pair of the calling instruction (a call or invoke) /// and the call graph node being called. - typedef std::pair<WeakTrackingVH, CallGraphNode *> CallRecord; + using CallRecord = std::pair<WeakTrackingVH, CallGraphNode *>; public: - typedef std::vector<CallRecord> CalledFunctionsVector; + using CalledFunctionsVector = std::vector<CallRecord>; /// \brief Creates a node for the specified function. - inline CallGraphNode(Function *F) : F(F), NumReferences(0) {} + inline CallGraphNode(Function *F) : F(F) {} + + CallGraphNode(const CallGraphNode &) = delete; + CallGraphNode &operator=(const CallGraphNode &) = delete; ~CallGraphNode() { assert(NumReferences == 0 && "Node deleted while references remain"); } - typedef std::vector<CallRecord>::iterator iterator; - typedef std::vector<CallRecord>::const_iterator const_iterator; + using iterator = std::vector<CallRecord>::iterator; + using const_iterator = std::vector<CallRecord>::const_iterator; /// \brief Returns the function that this call graph node represents. Function *getFunction() const { return F; } @@ -268,10 +275,7 @@ private: /// \brief The number of times that this CallGraphNode occurs in the /// CalledFunctions array of this or other CallGraphNodes. - unsigned NumReferences; - - CallGraphNode(const CallGraphNode &) = delete; - void operator=(const CallGraphNode &) = delete; + unsigned NumReferences = 0; void DropRef() { --NumReferences; } void AddRef() { ++NumReferences; } @@ -287,11 +291,12 @@ private: /// resulting data. class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> { friend AnalysisInfoMixin<CallGraphAnalysis>; + static AnalysisKey Key; public: - /// \brief A formulaic typedef to inform clients of the result type. - typedef CallGraph Result; + /// \brief A formulaic type to inform clients of the result type. + using Result = CallGraph; /// \brief Compute the \c CallGraph for the module \c M. /// @@ -305,6 +310,7 @@ class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> { public: explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; @@ -329,8 +335,8 @@ public: const CallGraph &getCallGraph() const { return *G; } CallGraph &getCallGraph() { return *G; } - typedef CallGraph::iterator iterator; - typedef CallGraph::const_iterator const_iterator; + using iterator = CallGraph::iterator; + using const_iterator = CallGraph::const_iterator; /// \brief Returns the module the call graph corresponds to. Module &getModule() const { return G->getModule(); } @@ -399,40 +405,38 @@ public: // Provide graph traits for tranversing call graphs using standard graph // traversals. template <> struct GraphTraits<CallGraphNode *> { - typedef CallGraphNode *NodeRef; - - typedef CallGraphNode::CallRecord CGNPairTy; + using NodeRef = CallGraphNode *; + using CGNPairTy = CallGraphNode::CallRecord; static NodeRef getEntryNode(CallGraphNode *CGN) { return CGN; } - static CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } - typedef mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)> - ChildIteratorType; + using ChildIteratorType = + mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)>; static ChildIteratorType child_begin(NodeRef N) { return ChildIteratorType(N->begin(), &CGNGetValue); } + static ChildIteratorType child_end(NodeRef N) { return ChildIteratorType(N->end(), &CGNGetValue); } }; template <> struct GraphTraits<const CallGraphNode *> { - typedef const CallGraphNode *NodeRef; - - typedef CallGraphNode::CallRecord CGNPairTy; + using NodeRef = const CallGraphNode *; + using CGNPairTy = CallGraphNode::CallRecord; static NodeRef getEntryNode(const CallGraphNode *CGN) { return CGN; } - static const CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; } - typedef mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)> - ChildIteratorType; + using ChildIteratorType = + mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)>; static ChildIteratorType child_begin(NodeRef N) { return ChildIteratorType(N->begin(), &CGNGetValue); } + static ChildIteratorType child_end(NodeRef N) { return ChildIteratorType(N->end(), &CGNGetValue); } @@ -440,21 +444,25 @@ template <> struct GraphTraits<const CallGraphNode *> { template <> struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { + using PairTy = + std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; + static NodeRef getEntryNode(CallGraph *CGN) { return CGN->getExternalCallingNode(); // Start at the external node! } - typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>> - PairTy; + static CallGraphNode *CGGetValuePtr(const PairTy &P) { return P.second.get(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)> - nodes_iterator; + using nodes_iterator = + mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)>; + static nodes_iterator nodes_begin(CallGraph *CG) { return nodes_iterator(CG->begin(), &CGGetValuePtr); } + static nodes_iterator nodes_end(CallGraph *CG) { return nodes_iterator(CG->end(), &CGGetValuePtr); } @@ -463,26 +471,30 @@ struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> { template <> struct GraphTraits<const CallGraph *> : public GraphTraits< const CallGraphNode *> { + using PairTy = + std::pair<const Function *const, std::unique_ptr<CallGraphNode>>; + static NodeRef getEntryNode(const CallGraph *CGN) { return CGN->getExternalCallingNode(); // Start at the external node! } - typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>> - PairTy; + static const CallGraphNode *CGGetValuePtr(const PairTy &P) { return P.second.get(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)> - nodes_iterator; + using nodes_iterator = + mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)>; + static nodes_iterator nodes_begin(const CallGraph *CG) { return nodes_iterator(CG->begin(), &CGGetValuePtr); } + static nodes_iterator nodes_end(const CallGraph *CG) { return nodes_iterator(CG->end(), &CGGetValuePtr); } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_CALLGRAPH_H diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h index f86f64bbb67d..ace54607634c 100644 --- a/include/llvm/Analysis/CallGraphSCCPass.h +++ b/include/llvm/Analysis/CallGraphSCCPass.h @@ -21,16 +21,16 @@ #ifndef LLVM_ANALYSIS_CALLGRAPHSCCPASS_H #define LLVM_ANALYSIS_CALLGRAPHSCCPASS_H -#include "llvm/Analysis/CallGraph.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Pass.h" -#include "llvm/PassSupport.h" +#include <vector> namespace llvm { -class CallGraphNode; class CallGraph; -class PMStack; +class CallGraphNode; class CallGraphSCC; +class PMStack; class CallGraphSCCPass : public Pass { public: @@ -38,7 +38,7 @@ public: /// createPrinterPass - Get a pass that prints the Module /// corresponding to a CallGraph. - Pass *createPrinterPass(raw_ostream &O, + Pass *createPrinterPass(raw_ostream &OS, const std::string &Banner) const override; using llvm::Pass::doInitialization; @@ -57,7 +57,6 @@ public: /// /// SCC passes that add or delete functions to the SCC are required to update /// the SCC list, otherwise stale pointers may be dereferenced. - /// virtual bool runOnSCC(CallGraphSCC &SCC) = 0; /// doFinalization - This method is called after the SCC's of the program has @@ -89,7 +88,7 @@ protected: class CallGraphSCC { const CallGraph &CG; // The call graph for this SCC. void *Context; // The CGPassManager object that is vending this. - std::vector<CallGraphNode*> Nodes; + std::vector<CallGraphNode *> Nodes; public: CallGraphSCC(CallGraph &cg, void *context) : CG(cg), Context(context) {} @@ -105,7 +104,8 @@ public: /// Old node has been deleted, and New is to be used in its place. void ReplaceNode(CallGraphNode *Old, CallGraphNode *New); - typedef std::vector<CallGraphNode *>::const_iterator iterator; + using iterator = std::vector<CallGraphNode *>::const_iterator; + iterator begin() const { return Nodes.begin(); } iterator end() const { return Nodes.end(); } @@ -119,16 +119,19 @@ void initializeDummyCGSCCPassPass(PassRegistry &); class DummyCGSCCPass : public CallGraphSCCPass { public: static char ID; + DummyCGSCCPass() : CallGraphSCCPass(ID) { PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeDummyCGSCCPassPass(Registry); - }; + } + bool runOnSCC(CallGraphSCC &SCC) override { return false; } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_CALLGRAPHSCCPASS_H diff --git a/include/llvm/Analysis/CmpInstAnalysis.h b/include/llvm/Analysis/CmpInstAnalysis.h new file mode 100644 index 000000000000..3cc69d9fea29 --- /dev/null +++ b/include/llvm/Analysis/CmpInstAnalysis.h @@ -0,0 +1,72 @@ +//===-- CmpInstAnalysis.h - Utils to help fold compare insts ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file holds routines to help analyse compare instructions +// and fold them into constants or other compare instructions +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CMPINSTANALYSIS_H +#define LLVM_ANALYSIS_CMPINSTANALYSIS_H + +#include "llvm/IR/InstrTypes.h" + +namespace llvm { + class ICmpInst; + class Value; + + /// Encode a icmp predicate into a three bit mask. These bits are carefully + /// arranged to allow folding of expressions such as: + /// + /// (A < B) | (A > B) --> (A != B) + /// + /// Note that this is only valid if the first and second predicates have the + /// same sign. It is illegal to do: (A u< B) | (A s> B) + /// + /// Three bits are used to represent the condition, as follows: + /// 0 A > B + /// 1 A == B + /// 2 A < B + /// + /// <=> Value Definition + /// 000 0 Always false + /// 001 1 A > B + /// 010 2 A == B + /// 011 3 A >= B + /// 100 4 A < B + /// 101 5 A != B + /// 110 6 A <= B + /// 111 7 Always true + /// + unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false); + + /// This is the complement of getICmpCode, which turns an opcode and two + /// operands into either a constant true or false, or the predicate for a new + /// ICmp instruction. The sign is passed in to determine which kind of + /// predicate to use in the new icmp instruction. + /// Non-NULL return value will be a true or false constant. + /// NULL return means a new ICmp is needed. The predicate for which is output + /// in NewICmpPred. + Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, + CmpInst::Predicate &NewICmpPred); + + /// Return true if both predicates match sign or if at least one of them is an + /// equality comparison (which is signless). + bool PredicatesFoldable(CmpInst::Predicate p1, CmpInst::Predicate p2); + + /// Decompose an icmp into the form ((X & Mask) pred 0) if possible. The + /// returned predicate is either == or !=. Returns false if decomposition + /// fails. + bool decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate &Pred, + Value *&X, APInt &Mask, + bool LookThroughTrunc = true); + +} // end namespace llvm + +#endif diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index 42034741b8e3..6d4eef412525 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -79,6 +79,12 @@ ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL); +/// \brief Attempt to constant fold a select instruction with the specified +/// operands. The constant result is returned if successful; if not, null is +/// returned. +Constant *ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, + Constant *V2); + /// \brief Attempt to constant fold a cast with the specified operand. If it /// fails, it returns a constant expression of the specified operand. Constant *ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, @@ -96,6 +102,13 @@ Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, Constant *ConstantFoldExtractValueInstruction(Constant *Agg, ArrayRef<unsigned> Idxs); +/// \brief Attempt to constant fold an insertelement instruction with the +/// specified operands and indices. The constant result is returned if +/// successful; if not, null is returned. +Constant *ConstantFoldInsertElementInstruction(Constant *Val, + Constant *Elt, + Constant *Idx); + /// \brief Attempt to constant fold an extractelement instruction with the /// specified operands and indices. The constant result is returned if /// successful; if not, null is returned. diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h index ca50ee2f829a..39f9c39c34e1 100644 --- a/include/llvm/Analysis/DOTGraphTraitsPass.h +++ b/include/llvm/Analysis/DOTGraphTraitsPass.h @@ -30,7 +30,7 @@ struct DefaultAnalysisGraphTraits { template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> > + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > class DOTGraphTraitsViewer : public FunctionPass { public: DOTGraphTraitsViewer(StringRef GraphName, char &ID) @@ -72,7 +72,7 @@ private: template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> > + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > class DOTGraphTraitsPrinter : public FunctionPass { public: DOTGraphTraitsPrinter(StringRef GraphName, char &ID) @@ -124,7 +124,7 @@ private: template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> > + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > class DOTGraphTraitsModuleViewer : public ModulePass { public: DOTGraphTraitsModuleViewer(StringRef GraphName, char &ID) @@ -150,7 +150,7 @@ private: template < typename AnalysisT, bool IsSimple, typename GraphT = AnalysisT *, - typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT> > + typename AnalysisGraphTraitsT = DefaultAnalysisGraphTraits<AnalysisT, GraphT> > class DOTGraphTraitsModulePrinter : public ModulePass { public: DOTGraphTraitsModulePrinter(StringRef GraphName, char &ID) diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h index e52c66f361c3..ab8668256ba2 100644 --- a/include/llvm/Analysis/DemandedBits.h +++ b/include/llvm/Analysis/DemandedBits.h @@ -1,4 +1,4 @@ -//===-- llvm/Analysis/DemandedBits.h - Determine demanded bits --*- C++ -*-===// +//===- llvm/Analysis/DemandedBits.h - Determine demanded bits ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,44 +24,45 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { -class FunctionPass; +class AssumptionCache; +class DominatorTree; class Function; class Instruction; -class DominatorTree; -class AssumptionCache; struct KnownBits; +class raw_ostream; class DemandedBits { public: DemandedBits(Function &F, AssumptionCache &AC, DominatorTree &DT) : - F(F), AC(AC), DT(DT), Analyzed(false) {} + F(F), AC(AC), DT(DT) {} /// Return the bits demanded from instruction I. APInt getDemandedBits(Instruction *I); /// Return true if, during analysis, I could not be reached. bool isInstructionDead(Instruction *I); - + void print(raw_ostream &OS); private: - Function &F; - AssumptionCache &AC; - DominatorTree &DT; - void performAnalysis(); void determineLiveOperandBits(const Instruction *UserI, const Instruction *I, unsigned OperandNo, const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2); - bool Analyzed; + Function &F; + AssumptionCache &AC; + DominatorTree &DT; + + bool Analyzed = false; // The set of visited instructions (non-integer-typed only). SmallPtrSet<Instruction*, 32> Visited; @@ -71,16 +72,18 @@ private: class DemandedBitsWrapperPass : public FunctionPass { private: mutable Optional<DemandedBits> DB; + public: static char ID; // Pass identification, replacement for typeid + DemandedBitsWrapperPass(); bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override; - + /// Clean up memory in between runs void releaseMemory() override; - + DemandedBits &getDemandedBits() { return *DB; } void print(raw_ostream &OS, const Module *M) const override; @@ -89,11 +92,12 @@ public: /// An analysis that produces \c DemandedBits for a function. class DemandedBitsAnalysis : public AnalysisInfoMixin<DemandedBitsAnalysis> { friend AnalysisInfoMixin<DemandedBitsAnalysis>; + static AnalysisKey Key; public: - /// \brief Provide the result typedef for this analysis pass. - typedef DemandedBits Result; + /// \brief Provide the result type for this analysis pass. + using Result = DemandedBits; /// \brief Run the analysis pass over a function and produce demanded bits /// information. @@ -106,12 +110,13 @@ class DemandedBitsPrinterPass : public PassInfoMixin<DemandedBitsPrinterPass> { public: explicit DemandedBitsPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; /// Create a demanded bits analysis pass. FunctionPass *createDemandedBitsWrapperPass(); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_DEMANDED_BITS_H diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h index b566aeaf1fd6..a304dff18c79 100644 --- a/include/llvm/Analysis/DominanceFrontier.h +++ b/include/llvm/Analysis/DominanceFrontier.h @@ -18,40 +18,46 @@ #ifndef LLVM_ANALYSIS_DOMINANCEFRONTIER_H #define LLVM_ANALYSIS_DOMINANCEFRONTIER_H -#include "llvm/IR/Dominators.h" +#include "llvm/ADT/GraphTraits.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/GenericDomTree.h" +#include <cassert> #include <map> #include <set> +#include <utility> +#include <vector> namespace llvm { +class Function; +class raw_ostream; + //===----------------------------------------------------------------------===// /// DominanceFrontierBase - Common base class for computing forward and inverse /// dominance frontiers for a function. /// template <class BlockT, bool IsPostDom> class DominanceFrontierBase { - public: - typedef std::set<BlockT *> DomSetType; // Dom set for a bb - typedef std::map<BlockT *, DomSetType> DomSetMapType; // Dom set map +public: + using DomSetType = std::set<BlockT *>; // Dom set for a bb + using DomSetMapType = std::map<BlockT *, DomSetType>; // Dom set map protected: - typedef GraphTraits<BlockT *> BlockTraits; + using BlockTraits = GraphTraits<BlockT *>; DomSetMapType Frontiers; - std::vector<BlockT *> Roots; + // Postdominators can have multiple roots. + SmallVector<BlockT *, IsPostDom ? 4 : 1> Roots; static constexpr bool IsPostDominators = IsPostDom; - public: - DominanceFrontierBase() {} +public: + DominanceFrontierBase() = default; /// getRoots - Return the root blocks of the current CFG. This may include /// multiple blocks if we are computing post dominators. For forward /// dominators, this will always be a single block (the entry node). - /// - inline const std::vector<BlockT *> &getRoots() const { - return Roots; - } + const SmallVectorImpl<BlockT *> &getRoots() const { return Roots; } BlockT *getRoot() const { assert(Roots.size() == 1 && "Should always have entry node!"); @@ -59,7 +65,6 @@ protected: } /// isPostDominator - Returns true if analysis based of postdoms - /// bool isPostDominator() const { return IsPostDominators; } @@ -69,8 +74,9 @@ protected: } // Accessor interface: - typedef typename DomSetMapType::iterator iterator; - typedef typename DomSetMapType::const_iterator const_iterator; + using iterator = typename DomSetMapType::iterator; + using const_iterator = typename DomSetMapType::const_iterator; + iterator begin() { return Frontiers.begin(); } const_iterator begin() const { return Frontiers.begin(); } iterator end() { return Frontiers.end(); } @@ -115,19 +121,19 @@ protected: template <class BlockT> class ForwardDominanceFrontierBase : public DominanceFrontierBase<BlockT, false> { - private: - typedef GraphTraits<BlockT *> BlockTraits; +private: + using BlockTraits = GraphTraits<BlockT *>; public: - typedef DomTreeBase<BlockT> DomTreeT; - typedef DomTreeNodeBase<BlockT> DomTreeNodeT; - typedef typename DominanceFrontierBase<BlockT, false>::DomSetType DomSetType; - - void analyze(DomTreeT &DT) { - this->Roots = DT.getRoots(); - assert(this->Roots.size() == 1 && - "Only one entry block for forward domfronts!"); - calculate(DT, DT[this->Roots[0]]); + using DomTreeT = DomTreeBase<BlockT>; + using DomTreeNodeT = DomTreeNodeBase<BlockT>; + using DomSetType = typename DominanceFrontierBase<BlockT, false>::DomSetType; + + void analyze(DomTreeT &DT) { + assert(DT.getRoots().size() == 1 && + "Only one entry block for forward domfronts!"); + this->Roots = {DT.getRoot()}; + calculate(DT, DT[this->Roots[0]]); } const DomSetType &calculate(const DomTreeT &DT, const DomTreeNodeT *Node); @@ -135,20 +141,21 @@ public: class DominanceFrontier : public ForwardDominanceFrontierBase<BasicBlock> { public: - typedef DomTreeBase<BasicBlock> DomTreeT; - typedef DomTreeNodeBase<BasicBlock> DomTreeNodeT; - typedef DominanceFrontierBase<BasicBlock, false>::DomSetType DomSetType; - typedef DominanceFrontierBase<BasicBlock, false>::iterator iterator; - typedef DominanceFrontierBase<BasicBlock, false>::const_iterator - const_iterator; - - /// Handle invalidation explicitly. - bool invalidate(Function &F, const PreservedAnalyses &PA, - FunctionAnalysisManager::Invalidator &); + using DomTreeT = DomTreeBase<BasicBlock>; + using DomTreeNodeT = DomTreeNodeBase<BasicBlock>; + using DomSetType = DominanceFrontierBase<BasicBlock, false>::DomSetType; + using iterator = DominanceFrontierBase<BasicBlock, false>::iterator; + using const_iterator = + DominanceFrontierBase<BasicBlock, false>::const_iterator; + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); }; class DominanceFrontierWrapperPass : public FunctionPass { DominanceFrontier DF; + public: static char ID; // Pass ID, replacement for typeid @@ -176,11 +183,12 @@ extern template class ForwardDominanceFrontierBase<BasicBlock>; class DominanceFrontierAnalysis : public AnalysisInfoMixin<DominanceFrontierAnalysis> { friend AnalysisInfoMixin<DominanceFrontierAnalysis>; + static AnalysisKey Key; public: - /// \brief Provide the result typedef for this analysis pass. - typedef DominanceFrontier Result; + /// \brief Provide the result type for this analysis pass. + using Result = DominanceFrontier; /// \brief Run the analysis pass over a function and produce a dominator tree. DominanceFrontier run(Function &F, FunctionAnalysisManager &AM); @@ -193,9 +201,10 @@ class DominanceFrontierPrinterPass public: explicit DominanceFrontierPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_DOMINANCEFRONTIER_H diff --git a/include/llvm/Analysis/DominanceFrontierImpl.h b/include/llvm/Analysis/DominanceFrontierImpl.h index 5093b975e709..dffb2e02b621 100644 --- a/include/llvm/Analysis/DominanceFrontierImpl.h +++ b/include/llvm/Analysis/DominanceFrontierImpl.h @@ -18,21 +18,28 @@ #ifndef LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H #define LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H +#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <set> +#include <utility> +#include <vector> namespace llvm { template <class BlockT> class DFCalculateWorkObject { public: - typedef DomTreeNodeBase<BlockT> DomTreeNodeT; + using DomTreeNodeT = DomTreeNodeBase<BlockT>; DFCalculateWorkObject(BlockT *B, BlockT *P, const DomTreeNodeT *N, const DomTreeNodeT *PN) : currentBB(B), parentBB(P), Node(N), parentNode(PN) {} + BlockT *currentBB; BlockT *parentBB; const DomTreeNodeT *Node; @@ -219,6 +226,6 @@ ForwardDominanceFrontierBase<BlockT>::calculate(const DomTreeT &DT, return *Result; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_DOMINANCEFRONTIERIMPL_H diff --git a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h index 007e4d8602fa..8b1c10139de8 100644 --- a/include/llvm/Analysis/IndirectCallPromotionAnalysis.h +++ b/include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -27,10 +27,12 @@ private: // Allocate space to read the profile annotation. std::unique_ptr<InstrProfValueData[]> ValueDataArray; - // Count is the call count for the direct-call target and - // TotalCount is the call count for the indirect-call callsite. + // Count is the call count for the direct-call target. + // TotalCount is the total call count for the indirect-call callsite. + // RemainingCount is the TotalCount minus promoted-direct-call count. // Return true we should promote this indirect-call target. - bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount); + bool isPromotionProfitable(uint64_t Count, uint64_t TotalCount, + uint64_t RemainingCount); // Returns the number of profitable candidates to promote for the // current ValueDataArray and the given \p Inst. diff --git a/include/llvm/Analysis/IndirectCallSiteVisitor.h b/include/llvm/Analysis/IndirectCallSiteVisitor.h index 3c40cc0235cc..dde56a143c51 100644 --- a/include/llvm/Analysis/IndirectCallSiteVisitor.h +++ b/include/llvm/Analysis/IndirectCallSiteVisitor.h @@ -27,7 +27,7 @@ struct PGOIndirectCallSiteVisitor }; // Helper function that finds all indirect call sites. -static inline std::vector<Instruction *> findIndirectCallSites(Function &F) { +inline std::vector<Instruction *> findIndirectCallSites(Function &F) { PGOIndirectCallSiteVisitor ICV; ICV.visit(F); return ICV.IndirectCallInsts; diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index f33a2de5a5f4..985f3880ed3a 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -16,6 +16,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include <cassert> #include <climits> @@ -105,6 +106,12 @@ public: return Cost; } + /// \brief Get the threshold against which the cost was computed + int getThreshold() const { + assert(isVariable() && "Invalid access of InlineCost"); + return Threshold; + } + /// \brief Get the cost delta from the threshold for inlining. /// Only valid if the cost is of the variable kind. Returns a negative /// value if the cost is too high to inline. @@ -139,8 +146,15 @@ struct InlineParams { /// Threshold to use when the callsite is considered hot. Optional<int> HotCallSiteThreshold; + /// Threshold to use when the callsite is considered hot relative to function + /// entry. + Optional<int> LocallyHotCallSiteThreshold; + /// Threshold to use when the callsite is considered cold. Optional<int> ColdCallSiteThreshold; + + /// Compute inline cost even when the cost has exceeded the threshold. + Optional<bool> ComputeFullInlineCost; }; /// Generate the parameters to tune the inline cost analysis based only on the @@ -175,12 +189,11 @@ int getCallsiteCost(CallSite CS, const DataLayout &DL); /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. -InlineCost -getInlineCost(CallSite CS, const InlineParams &Params, - TargetTransformInfo &CalleeTTI, - std::function<AssumptionCache &(Function &)> &GetAssumptionCache, - Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, - ProfileSummaryInfo *PSI); +InlineCost getInlineCost( + CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, + std::function<AssumptionCache &(Function &)> &GetAssumptionCache, + Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr); /// \brief Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a @@ -192,7 +205,7 @@ getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, - ProfileSummaryInfo *PSI); + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE); /// \brief Minimal filter to detect invalid constructs for inlining. bool isInlineViable(Function &Callee); diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index be0f32ef444a..3932a2ec2498 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -161,6 +161,10 @@ Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const SimplifyQuery &Q); +/// Given operands for an InsertElement, fold the result or return null. +Value *SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, + const SimplifyQuery &Q); + /// Given operands for an ExtractValueInst, fold the result or return null. Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs, const SimplifyQuery &Q); diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h index a63a004043cc..f3714dddedd5 100644 --- a/include/llvm/Analysis/Interval.h +++ b/include/llvm/Analysis/Interval.h @@ -39,10 +39,11 @@ class Interval { /// interval. Also, any loops in this interval must go through the HeaderNode. /// BasicBlock *HeaderNode; + public: - typedef std::vector<BasicBlock*>::iterator succ_iterator; - typedef std::vector<BasicBlock*>::iterator pred_iterator; - typedef std::vector<BasicBlock*>::iterator node_iterator; + using succ_iterator = std::vector<BasicBlock*>::iterator; + using pred_iterator = std::vector<BasicBlock*>::iterator; + using node_iterator = std::vector<BasicBlock*>::iterator; inline Interval(BasicBlock *Header) : HeaderNode(Header) { Nodes.push_back(Header); @@ -51,18 +52,15 @@ public: inline BasicBlock *getHeaderNode() const { return HeaderNode; } /// Nodes - The basic blocks in this interval. - /// std::vector<BasicBlock*> Nodes; /// Successors - List of BasicBlocks that are reachable directly from nodes in /// this interval, but are not in the interval themselves. /// These nodes necessarily must be header nodes for other intervals. - /// std::vector<BasicBlock*> Successors; /// Predecessors - List of BasicBlocks that have this Interval's header block /// as one of their successors. - /// std::vector<BasicBlock*> Predecessors; /// contains - Find out if a basic block is in this interval @@ -88,7 +86,6 @@ public: /// Equality operator. It is only valid to compare two intervals from the /// same partition, because of this, all we have to check is the header node /// for equality. - /// inline bool operator==(const Interval &I) const { return HeaderNode == I.HeaderNode; } @@ -121,8 +118,8 @@ inline Interval::pred_iterator pred_end(Interval *I) { } template <> struct GraphTraits<Interval*> { - typedef Interval *NodeRef; - typedef Interval::succ_iterator ChildIteratorType; + using NodeRef = Interval *; + using ChildIteratorType = Interval::succ_iterator; static NodeRef getEntryNode(Interval *I) { return I; } @@ -131,14 +128,15 @@ template <> struct GraphTraits<Interval*> { static ChildIteratorType child_end(NodeRef N) { return succ_end(N); } }; -template <> struct GraphTraits<Inverse<Interval*> > { - typedef Interval *NodeRef; - typedef Interval::pred_iterator ChildIteratorType; +template <> struct GraphTraits<Inverse<Interval*>> { + using NodeRef = Interval *; + using ChildIteratorType = Interval::pred_iterator; + static NodeRef getEntryNode(Inverse<Interval *> G) { return G.Graph; } static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_INTERVAL_H diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h index 655ce2dab413..6ffcae592e98 100644 --- a/include/llvm/Analysis/IntervalIterator.h +++ b/include/llvm/Analysis/IntervalIterator.h @@ -33,26 +33,32 @@ #ifndef LLVM_ANALYSIS_INTERVALITERATOR_H #define LLVM_ANALYSIS_INTERVALITERATOR_H +#include "llvm/ADT/GraphTraits.h" +#include "llvm/Analysis/Interval.h" #include "llvm/Analysis/IntervalPartition.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Function.h" +#include "llvm/Support/ErrorHandling.h" #include <algorithm> +#include <cassert> +#include <iterator> #include <set> +#include <utility> #include <vector> namespace llvm { +class BasicBlock; + // getNodeHeader - Given a source graph node and the source graph, return the // BasicBlock that is the header node. This is the opposite of // getSourceGraphNode. -// inline BasicBlock *getNodeHeader(BasicBlock *BB) { return BB; } inline BasicBlock *getNodeHeader(Interval *I) { return I->getHeaderNode(); } // getSourceGraphNode - Given a BasicBlock and the source graph, return the // source graph node that corresponds to the BasicBlock. This is the opposite // of getNodeHeader. -// inline BasicBlock *getSourceGraphNode(Function *, BasicBlock *BB) { return BB; } @@ -64,7 +70,6 @@ inline Interval *getSourceGraphNode(IntervalPartition *IP, BasicBlock *BB) { // with the task of adding a node to the new interval, depending on the // type of the source node. In the case of a CFG source graph (BasicBlock // case), the BasicBlock itself is added to the interval. -// inline void addNodeToInterval(Interval *Int, BasicBlock *BB) { Int->Nodes.push_back(BB); } @@ -75,28 +80,25 @@ inline void addNodeToInterval(Interval *Int, BasicBlock *BB) { // case), the BasicBlock itself is added to the interval. In the case of // an IntervalPartition source graph (Interval case), all of the member // BasicBlocks are added to the interval. -// inline void addNodeToInterval(Interval *Int, Interval *I) { // Add all of the nodes in I as new nodes in Int. Int->Nodes.insert(Int->Nodes.end(), I->Nodes.begin(), I->Nodes.end()); } - - - - -template<class NodeTy, class OrigContainer_t, class GT = GraphTraits<NodeTy*>, - class IGT = GraphTraits<Inverse<NodeTy*> > > +template<class NodeTy, class OrigContainer_t, class GT = GraphTraits<NodeTy *>, + class IGT = GraphTraits<Inverse<NodeTy *>>> class IntervalIterator { - std::vector<std::pair<Interval*, typename Interval::succ_iterator> > IntStack; - std::set<BasicBlock*> Visited; + std::vector<std::pair<Interval *, typename Interval::succ_iterator>> IntStack; + std::set<BasicBlock *> Visited; OrigContainer_t *OrigContainer; bool IOwnMem; // If True, delete intervals when done with them // See file header for conditions of use + public: - typedef std::forward_iterator_tag iterator_category; + using iterator_category = std::forward_iterator_tag; + + IntervalIterator() = default; // End iterator, empty stack - IntervalIterator() {} // End iterator, empty stack IntervalIterator(Function *M, bool OwnMemory) : IOwnMem(OwnMemory) { OrigContainer = M; if (!ProcessInterval(&M->front())) { @@ -157,6 +159,7 @@ public: return *this; } + IntervalIterator operator++(int) { // Postincrement IntervalIterator tmp = *this; ++*this; @@ -171,7 +174,6 @@ private: // // This method is templated because it may operate on two different source // graphs: a basic block graph, or a preexisting interval graph. - // bool ProcessInterval(NodeTy *Node) { BasicBlock *Header = getNodeHeader(Node); if (!Visited.insert(Header).second) @@ -196,7 +198,6 @@ private: // // This method is templated because it may operate on two different source // graphs: a basic block graph, or a preexisting interval graph. - // void ProcessNode(Interval *Int, NodeTy *Node) { assert(Int && "Null interval == bad!"); assert(Node && "Null Node == bad!"); @@ -241,10 +242,9 @@ private: } }; -typedef IntervalIterator<BasicBlock, Function> function_interval_iterator; -typedef IntervalIterator<Interval, IntervalPartition> - interval_part_interval_iterator; - +using function_interval_iterator = IntervalIterator<BasicBlock, Function>; +using interval_part_interval_iterator = + IntervalIterator<Interval, IntervalPartition>; inline function_interval_iterator intervals_begin(Function *F, bool DeleteInts = true) { @@ -263,6 +263,6 @@ inline interval_part_interval_iterator intervals_end(IntervalPartition &IP) { return interval_part_interval_iterator(); } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_INTERVALITERATOR_H diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h index 274be2bdcfa9..50335165711f 100644 --- a/include/llvm/Analysis/IntervalPartition.h +++ b/include/llvm/Analysis/IntervalPartition.h @@ -23,12 +23,15 @@ #ifndef LLVM_ANALYSIS_INTERVALPARTITION_H #define LLVM_ANALYSIS_INTERVALPARTITION_H -#include "llvm/Analysis/Interval.h" #include "llvm/Pass.h" #include <map> +#include <vector> namespace llvm { +class BasicBlock; +class Interval; + //===----------------------------------------------------------------------===// // // IntervalPartition - This class builds and holds an "interval partition" for @@ -38,17 +41,17 @@ namespace llvm { // nodes following it. // class IntervalPartition : public FunctionPass { - typedef std::map<BasicBlock*, Interval*> IntervalMapTy; + using IntervalMapTy = std::map<BasicBlock *, Interval *>; IntervalMapTy IntervalMap; - typedef std::vector<Interval*> IntervalListTy; - Interval *RootInterval; - std::vector<Interval*> Intervals; + using IntervalListTy = std::vector<Interval *>; + Interval *RootInterval = nullptr; + std::vector<Interval *> Intervals; public: static char ID; // Pass identification, replacement for typeid - IntervalPartition() : FunctionPass(ID), RootInterval(nullptr) { + IntervalPartition() : FunctionPass(ID) { initializeIntervalPartitionPass(*PassRegistry::getPassRegistry()); } @@ -58,7 +61,6 @@ public: // IntervalPartition ctor - Build a reduced interval partition from an // existing interval graph. This takes an additional boolean parameter to // distinguish it from a copy constructor. Always pass in false for now. - // IntervalPartition(IntervalPartition &I, bool); // print - Show contents in human readable format... @@ -95,17 +97,15 @@ private: // addIntervalToPartition - Add an interval to the internal list of intervals, // and then add mappings from all of the basic blocks in the interval to the // interval itself (in the IntervalMap). - // void addIntervalToPartition(Interval *I); // updatePredecessors - Interval generation only sets the successor fields of // the interval data structures. After interval generation is complete, // run through all of the intervals and propagate successor info as // predecessor info. - // void updatePredecessors(Interval *Int); }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_INTERVALPARTITION_H diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index a025f2275fb4..d1ec6a9dcc55 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -35,28 +35,33 @@ #ifndef LLVM_ANALYSIS_LAZYCALLGRAPH_H #define LLVM_ANALYSIS_LAZYCALLGRAPH_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> #include <iterator> +#include <string> #include <utility> namespace llvm { -class PreservedAnalyses; -class raw_ostream; + +class Module; +class Value; /// A lazily constructed view of the call graph of a module. /// @@ -183,8 +188,8 @@ public: friend class LazyCallGraph::Node; friend class LazyCallGraph::RefSCC; - typedef SmallVector<Edge, 4> VectorT; - typedef SmallVectorImpl<Edge> VectorImplT; + using VectorT = SmallVector<Edge, 4>; + using VectorImplT = SmallVectorImpl<Edge>; public: /// An iterator used for the edges to both entry nodes and child nodes. @@ -204,7 +209,7 @@ public: } public: - iterator() {} + iterator() = default; using iterator_adaptor_base::operator++; iterator &operator++() { @@ -240,7 +245,7 @@ public: } public: - call_iterator() {} + call_iterator() = default; using iterator_adaptor_base::operator++; call_iterator &operator++() { @@ -256,11 +261,17 @@ public: Edge &operator[](int i) { return Edges[i]; } Edge &operator[](Node &N) { assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!"); - return Edges[EdgeIndexMap.find(&N)->second]; + auto &E = Edges[EdgeIndexMap.find(&N)->second]; + assert(E && "Dead or null edge!"); + return E; } + Edge *lookup(Node &N) { auto EI = EdgeIndexMap.find(&N); - return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr; + if (EI == EdgeIndexMap.end()) + return nullptr; + auto &E = Edges[EI->second]; + return E ? &E : nullptr; } call_iterator call_begin() { @@ -329,7 +340,18 @@ public: bool operator!=(const Node &N) const { return !operator==(N); } /// Tests whether the node has been populated with edges. - operator bool() const { return Edges.hasValue(); } + bool isPopulated() const { return Edges.hasValue(); } + + /// Tests whether this is actually a dead node and no longer valid. + /// + /// Users rarely interact with nodes in this state and other methods are + /// invalid. This is used to model a node in an edge list where the + /// function has been completely removed. + bool isDead() const { + assert(!G == !F && + "Both graph and function pointers should be null or non-null."); + return !G; + } // We allow accessing the edges by dereferencing or using the arrow // operator, essentially wrapping the internal optional. @@ -365,15 +387,14 @@ public: // We provide for the DFS numbering and Tarjan walk lowlink numbers to be // stored directly within the node. These are both '-1' when nodes are part // of an SCC (or RefSCC), or '0' when not yet reached in a DFS walk. - int DFSNumber; - int LowLink; + int DFSNumber = 0; + int LowLink = 0; Optional<EdgeSequence> Edges; /// Basic constructor implements the scanning of F into Edges and /// EdgeIndexMap. - Node(LazyCallGraph &G, Function &F) - : G(&G), F(&F), DFSNumber(0), LowLink(0) {} + Node(LazyCallGraph &G, Function &F) : G(&G), F(&F) {} /// Implementation of the scan when populating. EdgeSequence &populateSlow(); @@ -462,7 +483,7 @@ public: #endif public: - typedef pointee_iterator<SmallVectorImpl<Node *>::const_iterator> iterator; + using iterator = pointee_iterator<SmallVectorImpl<Node *>::const_iterator>; iterator begin() const { return Nodes.begin(); } iterator end() const { return Nodes.end(); } @@ -528,7 +549,6 @@ public: friend class LazyCallGraph::Node; LazyCallGraph *G; - SmallPtrSet<RefSCC *, 1> Parents; /// A postorder list of the inner SCCs. SmallVector<SCC *, 4> SCCs; @@ -541,7 +561,6 @@ public: RefSCC(LazyCallGraph &G); void clear() { - Parents.clear(); SCCs.clear(); SCCIndices.clear(); } @@ -592,10 +611,10 @@ public: void handleTrivialEdgeInsertion(Node &SourceN, Node &TargetN); public: - typedef pointee_iterator<SmallVectorImpl<SCC *>::const_iterator> iterator; - typedef iterator_range<iterator> range; - typedef pointee_iterator<SmallPtrSetImpl<RefSCC *>::const_iterator> - parent_iterator; + using iterator = pointee_iterator<SmallVectorImpl<SCC *>::const_iterator>; + using range = iterator_range<iterator>; + using parent_iterator = + pointee_iterator<SmallPtrSetImpl<RefSCC *>::const_iterator>; iterator begin() const { return SCCs.begin(); } iterator end() const { return SCCs.end(); } @@ -608,27 +627,34 @@ public: return SCCs.begin() + SCCIndices.find(&C)->second; } - parent_iterator parent_begin() const { return Parents.begin(); } - parent_iterator parent_end() const { return Parents.end(); } - - iterator_range<parent_iterator> parents() const { - return make_range(parent_begin(), parent_end()); - } + /// Test if this RefSCC is a parent of \a RC. + /// + /// CAUTION: This method walks every edge in the \c RefSCC, it can be very + /// expensive. + bool isParentOf(const RefSCC &RC) const; - /// Test if this RefSCC is a parent of \a C. - bool isParentOf(const RefSCC &C) const { return C.isChildOf(*this); } + /// Test if this RefSCC is an ancestor of \a RC. + /// + /// CAUTION: This method walks the directed graph of edges as far as + /// necessary to find a possible path to the argument. In the worst case + /// this may walk the entire graph and can be extremely expensive. + bool isAncestorOf(const RefSCC &RC) const; - /// Test if this RefSCC is an ancestor of \a C. - bool isAncestorOf(const RefSCC &C) const { return C.isDescendantOf(*this); } + /// Test if this RefSCC is a child of \a RC. + /// + /// CAUTION: This method walks every edge in the argument \c RefSCC, it can + /// be very expensive. + bool isChildOf(const RefSCC &RC) const { return RC.isParentOf(*this); } - /// Test if this RefSCC is a child of \a C. - bool isChildOf(const RefSCC &C) const { - return Parents.count(const_cast<RefSCC *>(&C)); + /// Test if this RefSCC is a descendant of \a RC. + /// + /// CAUTION: This method walks the directed graph of edges as far as + /// necessary to find a possible path from the argument. In the worst case + /// this may walk the entire graph and can be extremely expensive. + bool isDescendantOf(const RefSCC &RC) const { + return RC.isAncestorOf(*this); } - /// Test if this RefSCC is a descendant of \a C. - bool isDescendantOf(const RefSCC &C) const; - /// Provide a short name by printing this RefSCC to a std::string. /// /// This copes with the fact that we don't have a name per-se for an RefSCC @@ -774,26 +800,25 @@ public: /// though, so be careful calling this while iterating over them. void removeOutgoingEdge(Node &SourceN, Node &TargetN); - /// Remove a ref edge which is entirely within this RefSCC. + /// Remove a list of ref edges which are entirely within this RefSCC. /// - /// Both the \a SourceN and the \a TargetN must be within this RefSCC. - /// Removing such an edge may break cycles that form this RefSCC and thus - /// this operation may change the RefSCC graph significantly. In + /// Both the \a SourceN and all of the \a TargetNs must be within this + /// RefSCC. Removing these edges may break cycles that form this RefSCC and + /// thus this operation may change the RefSCC graph significantly. In /// particular, this operation will re-form new RefSCCs based on the /// remaining connectivity of the graph. The following invariants are /// guaranteed to hold after calling this method: /// - /// 1) This RefSCC is still a RefSCC in the graph. - /// 2) This RefSCC will be the parent of any new RefSCCs. Thus, this RefSCC - /// is preserved as the root of any new RefSCC DAG formed. - /// 3) No RefSCC other than this RefSCC has its member set changed (this is + /// 1) If a ref-cycle remains after removal, it leaves this RefSCC intact + /// and in the graph. No new RefSCCs are built. + /// 2) Otherwise, this RefSCC will be dead after this call and no longer in + /// the graph or the postorder traversal of the call graph. Any iterator + /// pointing at this RefSCC will become invalid. + /// 3) All newly formed RefSCCs will be returned and the order of the + /// RefSCCs returned will be a valid postorder traversal of the new + /// RefSCCs. + /// 4) No RefSCC other than this RefSCC has its member set changed (this is /// inherent in the definition of removing such an edge). - /// 4) All of the parent links of the RefSCC graph will be updated to - /// reflect the new RefSCC structure. - /// 5) All RefSCCs formed out of this RefSCC, excluding this RefSCC, will - /// be returned in post-order. - /// 6) The order of the RefSCCs in the vector will be a valid postorder - /// traversal of the new RefSCCs. /// /// These invariants are very important to ensure that we can build /// optimization pipelines on top of the CGSCC pass manager which @@ -812,11 +837,9 @@ public: /// within this RefSCC and edges from this RefSCC to child RefSCCs. Some /// effort has been made to minimize the overhead of common cases such as /// self-edges and edge removals which result in a spanning tree with no - /// more cycles. There are also detailed comments within the implementation - /// on techniques which could substantially improve this routine's - /// efficiency. + /// more cycles. SmallVector<RefSCC *, 1> removeInternalRefEdge(Node &SourceN, - Node &TargetN); + ArrayRef<Node *> TargetNs); /// A convenience wrapper around the above to handle trivial cases of /// inserting a new call edge. @@ -870,14 +893,13 @@ public: struct IsAtEndT {}; LazyCallGraph *G; - RefSCC *RC; + RefSCC *RC = nullptr; /// Build the begin iterator for a node. postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G), RC(getRC(G, 0)) {} /// Build the end iterator for a node. This is selected purely by overload. - postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/) - : G(&G), RC(nullptr) {} + postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/) : G(&G) {} /// Get the post-order RefSCC at the given index of the postorder walk, /// populating it if necessary. @@ -1079,8 +1101,8 @@ public: ///@} private: - typedef SmallVectorImpl<Node *>::reverse_iterator node_stack_iterator; - typedef iterator_range<node_stack_iterator> node_stack_range; + using node_stack_iterator = SmallVectorImpl<Node *>::reverse_iterator; + using node_stack_range = iterator_range<node_stack_iterator>; /// Allocator that holds all the call graph nodes. SpecificBumpPtrAllocator<Node> BPA; @@ -1112,11 +1134,6 @@ private: /// RefSCCs. DenseMap<RefSCC *, int> RefSCCIndices; - /// The leaf RefSCCs of the graph. - /// - /// These are all of the RefSCCs which have no children. - SmallVector<RefSCC *, 4> LeafRefSCCs; - /// Defined functions that are also known library functions which the /// optimizer can reason about and therefore might introduce calls to out of /// thin air. @@ -1163,12 +1180,6 @@ private: /// Build the SCCs for a RefSCC out of a list of nodes. void buildSCCs(RefSCC &RC, node_stack_range Nodes); - /// Connect a RefSCC into the larger graph. - /// - /// This walks the edges to connect the RefSCC to its children's parent set, - /// and updates the root leaf list. - void connectRefSCC(RefSCC &RC); - /// Get the index of a RefSCC within the postorder traversal. /// /// Requires that this RefSCC is a valid one in the (perhaps partial) @@ -1185,7 +1196,9 @@ private: inline LazyCallGraph::Edge::Edge() : Value() {} inline LazyCallGraph::Edge::Edge(Node &N, Kind K) : Value(&N, K) {} -inline LazyCallGraph::Edge::operator bool() const { return Value.getPointer(); } +inline LazyCallGraph::Edge::operator bool() const { + return Value.getPointer() && !Value.getPointer()->isDead(); +} inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const { assert(*this && "Queried a null edge!"); @@ -1209,16 +1222,16 @@ inline Function &LazyCallGraph::Edge::getFunction() const { // Provide GraphTraits specializations for call graphs. template <> struct GraphTraits<LazyCallGraph::Node *> { - typedef LazyCallGraph::Node *NodeRef; - typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType; + using NodeRef = LazyCallGraph::Node *; + using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator; static NodeRef getEntryNode(NodeRef N) { return N; } static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); } static ChildIteratorType child_end(NodeRef N) { return (*N)->end(); } }; template <> struct GraphTraits<LazyCallGraph *> { - typedef LazyCallGraph::Node *NodeRef; - typedef LazyCallGraph::EdgeSequence::iterator ChildIteratorType; + using NodeRef = LazyCallGraph::Node *; + using ChildIteratorType = LazyCallGraph::EdgeSequence::iterator; static NodeRef getEntryNode(NodeRef N) { return N; } static ChildIteratorType child_begin(NodeRef N) { return (*N)->begin(); } @@ -1228,11 +1241,12 @@ template <> struct GraphTraits<LazyCallGraph *> { /// An analysis pass which computes the call graph for a module. class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> { friend AnalysisInfoMixin<LazyCallGraphAnalysis>; + static AnalysisKey Key; public: /// Inform generic clients of the result type. - typedef LazyCallGraph Result; + using Result = LazyCallGraph; /// Compute the \c LazyCallGraph for the module \c M. /// @@ -1268,6 +1282,7 @@ public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_LAZYCALLGRAPH_H diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index 2568903c57f3..54f151ef82e2 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -163,7 +163,7 @@ public: }; MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) - : PSE(PSE), InnermostLoop(L), AccessIdx(0), + : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeRegisterWidth(-1U), ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), RecordDependences(true) {} @@ -199,6 +199,10 @@ public: /// the accesses safely with. uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } + /// \brief Return the number of elements that are safe to operate on + /// simultaneously, multiplied by the size of the element in bits. + uint64_t getMaxSafeRegisterWidth() const { return MaxSafeRegisterWidth; } + /// \brief In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; } @@ -255,6 +259,12 @@ private: // We can access this many bytes in parallel safely. uint64_t MaxSafeDepDistBytes; + /// \brief Number of elements (from consecutive iterations) that are safe to + /// operate on simultaneously, multiplied by the size of the element in bits. + /// The size of the element is taken from the memory access that is most + /// restrictive. + uint64_t MaxSafeRegisterWidth; + /// \brief If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. bool ShouldRetryWithRuntimeCheck; @@ -657,6 +667,21 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap = ValueToValueMap(), bool Assume = false, bool ShouldCheckWrap = true); +/// \brief Attempt to sort the 'loads' in \p VL and return the sorted values in +/// \p Sorted. +/// +/// Returns 'false' if sorting is not legal or feasible, otherwise returns +/// 'true'. If \p Mask is not null, it also returns the \p Mask which is the +/// shuffle mask for actual memory access order. +/// +/// For example, for a given VL of memory accesses in program order, a[i+2], +/// a[i+0], a[i+1] and a[i+3], this function will sort the VL and save the +/// sorted value in 'Sorted' as a[i+0], a[i+1], a[i+2], a[i+3] and saves the +/// mask for actual memory accesses in program order in 'Mask' as <2,0,1,3> +bool sortLoadAccesses(ArrayRef<Value *> VL, const DataLayout &DL, + ScalarEvolution &SE, SmallVectorImpl<Value *> &Sorted, + SmallVectorImpl<unsigned> *Mask = nullptr); + /// \brief Returns true if the memory operations \p A and \p B are consecutive. /// This is a simple API that does not depend on the analysis pass. bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, diff --git a/include/llvm/Analysis/LoopAnalysisManager.h b/include/llvm/Analysis/LoopAnalysisManager.h index 17da516889b0..417ee979ce97 100644 --- a/include/llvm/Analysis/LoopAnalysisManager.h +++ b/include/llvm/Analysis/LoopAnalysisManager.h @@ -37,6 +37,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -58,8 +59,12 @@ struct LoopStandardAnalysisResults { ScalarEvolution &SE; TargetLibraryInfo &TLI; TargetTransformInfo &TTI; + MemorySSA *MSSA; }; +/// Enables memory ssa as a dependency for loop passes. +extern cl::opt<bool> EnableMSSALoopDependency; + /// Extern template declaration for the analysis set for this IR unit. extern template class AllAnalysesOn<Loop>; diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 70ce9a870517..28afc39727fa 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -29,7 +29,7 @@ // in the CFG. There can be strongly connected components in the CFG which // this analysis will not recognize and that will not be represented by a Loop // instance. In particular, a Loop might be inside such a non-loop SCC, or a -// non-loop SCC might contain a sub-SCC which is a Loop. +// non-loop SCC might contain a sub-SCC which is a Loop. // //===----------------------------------------------------------------------===// @@ -46,7 +46,9 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" #include <algorithm> +#include <utility> namespace llvm { @@ -56,110 +58,145 @@ class Loop; class MDNode; class PHINode; class raw_ostream; -template <class N, bool IsPostDom> -class DominatorTreeBase; -template<class N, class M> class LoopInfoBase; -template<class N, class M> class LoopBase; +template <class N, bool IsPostDom> class DominatorTreeBase; +template <class N, class M> class LoopInfoBase; +template <class N, class M> class LoopBase; //===----------------------------------------------------------------------===// /// Instances of this class are used to represent loops that are detected in the /// flow graph. /// -template<class BlockT, class LoopT> -class LoopBase { +template <class BlockT, class LoopT> class LoopBase { LoopT *ParentLoop; // Loops contained entirely within this one. std::vector<LoopT *> SubLoops; // The list of blocks in this loop. First entry is the header node. - std::vector<BlockT*> Blocks; + std::vector<BlockT *> Blocks; - SmallPtrSet<const BlockT*, 8> DenseBlockSet; + SmallPtrSet<const BlockT *, 8> DenseBlockSet; +#if LLVM_ENABLE_ABI_BREAKING_CHECKS /// Indicator that this loop is no longer a valid loop. bool IsInvalid = false; +#endif LoopBase(const LoopBase<BlockT, LoopT> &) = delete; - const LoopBase<BlockT, LoopT>& - operator=(const LoopBase<BlockT, LoopT> &) = delete; -public: - /// This creates an empty loop. - LoopBase() : ParentLoop(nullptr) {} - ~LoopBase() { - for (size_t i = 0, e = SubLoops.size(); i != e; ++i) - delete SubLoops[i]; - } + const LoopBase<BlockT, LoopT> & + operator=(const LoopBase<BlockT, LoopT> &) = delete; +public: /// Return the nesting level of this loop. An outer-most loop has depth 1, /// for consistency with loop depth values used for basic blocks, where depth /// 0 is used for blocks not inside any loops. unsigned getLoopDepth() const { + assert(!isInvalid() && "Loop not in a valid state!"); unsigned D = 1; for (const LoopT *CurLoop = ParentLoop; CurLoop; CurLoop = CurLoop->ParentLoop) ++D; return D; } - BlockT *getHeader() const { return Blocks.front(); } + BlockT *getHeader() const { return getBlocks().front(); } LoopT *getParentLoop() const { return ParentLoop; } /// This is a raw interface for bypassing addChildLoop. - void setParentLoop(LoopT *L) { ParentLoop = L; } + void setParentLoop(LoopT *L) { + assert(!isInvalid() && "Loop not in a valid state!"); + ParentLoop = L; + } /// Return true if the specified loop is contained within in this loop. bool contains(const LoopT *L) const { - if (L == this) return true; - if (!L) return false; + assert(!isInvalid() && "Loop not in a valid state!"); + if (L == this) + return true; + if (!L) + return false; return contains(L->getParentLoop()); } /// Return true if the specified basic block is in this loop. bool contains(const BlockT *BB) const { + assert(!isInvalid() && "Loop not in a valid state!"); return DenseBlockSet.count(BB); } /// Return true if the specified instruction is in this loop. - template<class InstT> - bool contains(const InstT *Inst) const { + template <class InstT> bool contains(const InstT *Inst) const { return contains(Inst->getParent()); } /// Return the loops contained entirely within this loop. - const std::vector<LoopT *> &getSubLoops() const { return SubLoops; } - std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; } + const std::vector<LoopT *> &getSubLoops() const { + assert(!isInvalid() && "Loop not in a valid state!"); + return SubLoops; + } + std::vector<LoopT *> &getSubLoopsVector() { + assert(!isInvalid() && "Loop not in a valid state!"); + return SubLoops; + } typedef typename std::vector<LoopT *>::const_iterator iterator; - typedef typename std::vector<LoopT *>::const_reverse_iterator - reverse_iterator; - iterator begin() const { return SubLoops.begin(); } - iterator end() const { return SubLoops.end(); } - reverse_iterator rbegin() const { return SubLoops.rbegin(); } - reverse_iterator rend() const { return SubLoops.rend(); } - bool empty() const { return SubLoops.empty(); } + typedef + typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator; + iterator begin() const { return getSubLoops().begin(); } + iterator end() const { return getSubLoops().end(); } + reverse_iterator rbegin() const { return getSubLoops().rbegin(); } + reverse_iterator rend() const { return getSubLoops().rend(); } + bool empty() const { return getSubLoops().empty(); } /// Get a list of the basic blocks which make up this loop. - const std::vector<BlockT*> &getBlocks() const { return Blocks; } - typedef typename std::vector<BlockT*>::const_iterator block_iterator; - block_iterator block_begin() const { return Blocks.begin(); } - block_iterator block_end() const { return Blocks.end(); } + ArrayRef<BlockT *> getBlocks() const { + assert(!isInvalid() && "Loop not in a valid state!"); + return Blocks; + } + typedef typename ArrayRef<BlockT *>::const_iterator block_iterator; + block_iterator block_begin() const { return getBlocks().begin(); } + block_iterator block_end() const { return getBlocks().end(); } inline iterator_range<block_iterator> blocks() const { + assert(!isInvalid() && "Loop not in a valid state!"); return make_range(block_begin(), block_end()); } /// Get the number of blocks in this loop in constant time. + /// Invalidate the loop, indicating that it is no longer a loop. unsigned getNumBlocks() const { + assert(!isInvalid() && "Loop not in a valid state!"); return Blocks.size(); } - /// Invalidate the loop, indicating that it is no longer a loop. - void invalidate() { IsInvalid = true; } - - /// Return true if this loop is no longer valid. - bool isInvalid() { return IsInvalid; } + /// Return a direct, mutable handle to the blocks vector so that we can + /// mutate it efficiently with techniques like `std::remove`. + std::vector<BlockT *> &getBlocksVector() { + assert(!isInvalid() && "Loop not in a valid state!"); + return Blocks; + } + /// Return a direct, mutable handle to the blocks set so that we can + /// mutate it efficiently. + SmallPtrSetImpl<const BlockT *> &getBlocksSet() { + assert(!isInvalid() && "Loop not in a valid state!"); + return DenseBlockSet; + } + + /// Return true if this loop is no longer valid. The only valid use of this + /// helper is "assert(L.isInvalid())" or equivalent, since IsInvalid is set to + /// true by the destructor. In other words, if this accessor returns true, + /// the caller has already triggered UB by calling this accessor; and so it + /// can only be called in a context where a return value of true indicates a + /// programmer error. + bool isInvalid() const { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + return IsInvalid; +#else + return false; +#endif + } /// True if terminator in the block can branch to another block that is /// outside of the current loop. bool isLoopExiting(const BlockT *BB) const { - for (const auto &Succ : children<const BlockT*>(BB)) { + assert(!isInvalid() && "Loop not in a valid state!"); + for (const auto &Succ : children<const BlockT *>(BB)) { if (!contains(Succ)) return true; } @@ -171,20 +208,22 @@ public: /// This function is useful when there are multiple latches in a loop /// because \fn getLoopLatch will return nullptr in that case. bool isLoopLatch(const BlockT *BB) const { + assert(!isInvalid() && "Loop not in a valid state!"); assert(contains(BB) && "block does not belong to the loop"); BlockT *Header = getHeader(); - auto PredBegin = GraphTraits<Inverse<BlockT*> >::child_begin(Header); - auto PredEnd = GraphTraits<Inverse<BlockT*> >::child_end(Header); + auto PredBegin = GraphTraits<Inverse<BlockT *>>::child_begin(Header); + auto PredEnd = GraphTraits<Inverse<BlockT *>>::child_end(Header); return std::find(PredBegin, PredEnd, BB) != PredEnd; } /// Calculate the number of back edges to the loop header. unsigned getNumBackEdges() const { + assert(!isInvalid() && "Loop not in a valid state!"); unsigned NumBackEdges = 0; BlockT *H = getHeader(); - for (const auto Pred : children<Inverse<BlockT*> >(H)) + for (const auto Pred : children<Inverse<BlockT *>>(H)) if (contains(Pred)) ++NumBackEdges; @@ -210,14 +249,14 @@ public: /// Return all of the successor blocks of this loop. These are the blocks /// _outside of the current loop_ which are branched to. - void getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const; + void getExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const; /// If getExitBlocks would return exactly one block, return that block. /// Otherwise return null. BlockT *getExitBlock() const; /// Edge type. - typedef std::pair<const BlockT*, const BlockT*> Edge; + typedef std::pair<const BlockT *, const BlockT *> Edge; /// Return all pairs of (_inside_block_,_outside_block_). void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const; @@ -243,8 +282,9 @@ public: /// Return all loop latch blocks of this loop. A latch block is a block that /// contains a branch back to the header. void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const { + assert(!isInvalid() && "Loop not in a valid state!"); BlockT *H = getHeader(); - for (const auto Pred : children<Inverse<BlockT*>>(H)) + for (const auto Pred : children<Inverse<BlockT *>>(H)) if (contains(Pred)) LoopLatches.push_back(Pred); } @@ -269,6 +309,7 @@ public: /// Add the specified loop to be a child of this loop. /// This updates the loop depth of the new child. void addChildLoop(LoopT *NewChild) { + assert(!isInvalid() && "Loop not in a valid state!"); assert(!NewChild->ParentLoop && "NewChild already has a parent!"); NewChild->ParentLoop = static_cast<LoopT *>(this); SubLoops.push_back(NewChild); @@ -277,37 +318,49 @@ public: /// This removes the specified child from being a subloop of this loop. The /// loop is not deleted, as it will presumably be inserted into another loop. LoopT *removeChildLoop(iterator I) { + assert(!isInvalid() && "Loop not in a valid state!"); assert(I != SubLoops.end() && "Cannot remove end iterator!"); LoopT *Child = *I; assert(Child->ParentLoop == this && "Child is not a child of this loop!"); - SubLoops.erase(SubLoops.begin()+(I-begin())); + SubLoops.erase(SubLoops.begin() + (I - begin())); Child->ParentLoop = nullptr; return Child; } + /// This removes the specified child from being a subloop of this loop. The + /// loop is not deleted, as it will presumably be inserted into another loop. + LoopT *removeChildLoop(LoopT *Child) { + return removeChildLoop(llvm::find(*this, Child)); + } + /// This adds a basic block directly to the basic block list. /// This should only be used by transformations that create new loops. Other /// transformations should use addBasicBlockToLoop. void addBlockEntry(BlockT *BB) { + assert(!isInvalid() && "Loop not in a valid state!"); Blocks.push_back(BB); DenseBlockSet.insert(BB); } /// interface to reverse Blocks[from, end of loop] in this loop void reverseBlock(unsigned from) { + assert(!isInvalid() && "Loop not in a valid state!"); std::reverse(Blocks.begin() + from, Blocks.end()); } /// interface to do reserve() for Blocks void reserveBlocks(unsigned size) { + assert(!isInvalid() && "Loop not in a valid state!"); Blocks.reserve(size); } /// This method is used to move BB (which must be part of this loop) to be the /// loop header of the loop (the block that dominates all others). void moveToHeader(BlockT *BB) { - if (Blocks[0] == BB) return; - for (unsigned i = 0; ; ++i) { + assert(!isInvalid() && "Loop not in a valid state!"); + if (Blocks[0] == BB) + return; + for (unsigned i = 0;; ++i) { assert(i != Blocks.size() && "Loop does not contain BB!"); if (Blocks[i] == BB) { Blocks[i] = Blocks[0]; @@ -321,6 +374,7 @@ public: /// Blocks as appropriate. This does not update the mapping in the LoopInfo /// class. void removeBlockFromLoop(BlockT *BB) { + assert(!isInvalid() && "Loop not in a valid state!"); auto I = find(Blocks, BB); assert(I != Blocks.end() && "N is not in this list!"); Blocks.erase(I); @@ -332,21 +386,47 @@ public: void verifyLoop() const; /// Verify loop structure of this loop and all nested loops. - void verifyLoopNest(DenseSet<const LoopT*> *Loops) const; + void verifyLoopNest(DenseSet<const LoopT *> *Loops) const; /// Print loop with all the BBs inside it. void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const; protected: friend class LoopInfoBase<BlockT, LoopT>; + + /// This creates an empty loop. + LoopBase() : ParentLoop(nullptr) {} + explicit LoopBase(BlockT *BB) : ParentLoop(nullptr) { Blocks.push_back(BB); DenseBlockSet.insert(BB); } + + // Since loop passes like SCEV are allowed to key analysis results off of + // `Loop` pointers, we cannot re-use pointers within a loop pass manager. + // This means loop passes should not be `delete` ing `Loop` objects directly + // (and risk a later `Loop` allocation re-using the address of a previous one) + // but should be using LoopInfo::markAsRemoved, which keeps around the `Loop` + // pointer till the end of the lifetime of the `LoopInfo` object. + // + // To make it easier to follow this rule, we mark the destructor as + // non-public. + ~LoopBase() { + for (auto *SubLoop : SubLoops) + SubLoop->~LoopT(); + +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + IsInvalid = true; +#endif + SubLoops.clear(); + Blocks.clear(); + DenseBlockSet.clear(); + ParentLoop = nullptr; + } }; -template<class BlockT, class LoopT> -raw_ostream& operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) { +template <class BlockT, class LoopT> +raw_ostream &operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) { Loop.print(OS); return OS; } @@ -354,7 +434,6 @@ raw_ostream& operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) { // Implementation in LoopInfoImpl.h extern template class LoopBase<BasicBlock, Loop>; - /// Represents a single loop in the control flow graph. Note that not all SCCs /// in the CFG are necessarily loops. class Loop : public LoopBase<BasicBlock, Loop> { @@ -367,21 +446,17 @@ public: public: LocRange() {} LocRange(DebugLoc Start) : Start(std::move(Start)), End(std::move(Start)) {} - LocRange(DebugLoc Start, DebugLoc End) : Start(std::move(Start)), - End(std::move(End)) {} + LocRange(DebugLoc Start, DebugLoc End) + : Start(std::move(Start)), End(std::move(End)) {} const DebugLoc &getStart() const { return Start; } const DebugLoc &getEnd() const { return End; } /// \brief Check for null. /// - explicit operator bool() const { - return Start && End; - } + explicit operator bool() const { return Start && End; } }; - Loop() {} - /// Return true if the specified value is loop invariant. bool isLoopInvariant(const Value *V) const; @@ -464,6 +539,14 @@ public: /// operand should be the node itself. void setLoopID(MDNode *LoopID) const; + /// Add llvm.loop.unroll.disable to this loop's loop id metadata. + /// + /// Remove existing unroll metadata and add unroll disable metadata to + /// indicate the loop has already been unrolled. This prevents a loop + /// from being unrolled more than is directed by a pragma if the loop + /// unrolling pass is run more than once (which it generally is). + void setLoopAlreadyUnrolled(); + /// Return true if no exit block for the loop has a predecessor that is /// outside the loop. bool hasDedicatedExits() const; @@ -499,8 +582,12 @@ public: } private: + Loop() = default; + friend class LoopInfoBase<BasicBlock, Loop>; + friend class LoopBase<BasicBlock, Loop>; explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {} + ~Loop() = default; }; //===----------------------------------------------------------------------===// @@ -508,25 +595,26 @@ private: /// structures in the specified function. /// -template<class BlockT, class LoopT> -class LoopInfoBase { +template <class BlockT, class LoopT> class LoopInfoBase { // BBMap - Mapping of basic blocks to the inner most loop they occur in DenseMap<const BlockT *, LoopT *> BBMap; std::vector<LoopT *> TopLevelLoops; - std::vector<LoopT *> RemovedLoops; + BumpPtrAllocator LoopAllocator; friend class LoopBase<BlockT, LoopT>; friend class LoopInfo; void operator=(const LoopInfoBase &) = delete; LoopInfoBase(const LoopInfoBase &) = delete; + public: - LoopInfoBase() { } + LoopInfoBase() {} ~LoopInfoBase() { releaseMemory(); } LoopInfoBase(LoopInfoBase &&Arg) : BBMap(std::move(Arg.BBMap)), - TopLevelLoops(std::move(Arg.TopLevelLoops)) { + TopLevelLoops(std::move(Arg.TopLevelLoops)), + LoopAllocator(std::move(Arg.LoopAllocator)) { // We have to clear the arguments top level loops as we've taken ownership. Arg.TopLevelLoops.clear(); } @@ -534,8 +622,10 @@ public: BBMap = std::move(RHS.BBMap); for (auto *L : TopLevelLoops) - delete L; + L->~LoopT(); + TopLevelLoops = std::move(RHS.TopLevelLoops); + LoopAllocator = std::move(RHS.LoopAllocator); RHS.TopLevelLoops.clear(); return *this; } @@ -544,19 +634,22 @@ public: BBMap.clear(); for (auto *L : TopLevelLoops) - delete L; + L->~LoopT(); TopLevelLoops.clear(); - for (auto *L : RemovedLoops) - delete L; - RemovedLoops.clear(); + LoopAllocator.Reset(); + } + + template <typename... ArgsTy> LoopT *AllocateLoop(ArgsTy &&... Args) { + LoopT *Storage = LoopAllocator.Allocate<LoopT>(); + return new (Storage) LoopT(std::forward<ArgsTy>(Args)...); } /// iterator/begin/end - The interface to the top-level loops in the current /// function. /// typedef typename std::vector<LoopT *>::const_iterator iterator; - typedef typename std::vector<LoopT *>::const_reverse_iterator - reverse_iterator; + typedef + typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator; iterator begin() const { return TopLevelLoops.begin(); } iterator end() const { return TopLevelLoops.end(); } reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); } @@ -585,9 +678,7 @@ public: LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); } /// Same as getLoopFor. - const LoopT *operator[](const BlockT *BB) const { - return getLoopFor(BB); - } + const LoopT *operator[](const BlockT *BB) const { return getLoopFor(BB); } /// Return the loop nesting level of the specified block. A depth of 0 means /// the block is not inside any loop. @@ -609,7 +700,7 @@ public: assert(I != end() && "Cannot remove end iterator!"); LoopT *L = *I; assert(!L->getParentLoop() && "Not a top-level loop!"); - TopLevelLoops.erase(TopLevelLoops.begin() + (I-begin())); + TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin())); return L; } @@ -626,8 +717,7 @@ public: /// Replace the specified loop in the top-level loops list with the indicated /// loop. - void changeTopLevelLoop(LoopT *OldLoop, - LoopT *NewLoop) { + void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop) { auto I = find(TopLevelLoops, OldLoop); assert(I != TopLevelLoops.end() && "Old loop not at top level!"); *I = NewLoop; @@ -658,8 +748,10 @@ public: static bool isNotAlreadyContainedIn(const LoopT *SubLoop, const LoopT *ParentLoop) { - if (!SubLoop) return true; - if (SubLoop == ParentLoop) return false; + if (!SubLoop) + return true; + if (SubLoop == ParentLoop) + return false; return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop); } @@ -670,6 +762,24 @@ public: void print(raw_ostream &OS) const; void verify(const DominatorTreeBase<BlockT, false> &DomTree) const; + + /// Destroy a loop that has been removed from the `LoopInfo` nest. + /// + /// This runs the destructor of the loop object making it invalid to + /// reference afterward. The memory is retained so that the *pointer* to the + /// loop remains valid. + /// + /// The caller is responsible for removing this loop from the loop nest and + /// otherwise disconnecting it from the broader `LoopInfo` data structures. + /// Callers that don't naturally handle this themselves should probably call + /// `erase' instead. + void destroy(LoopT *L) { + L->~LoopT(); + + // Since LoopAllocator is a BumpPtrAllocator, this Deallocate only poisons + // \c L, but the pointer remains valid for non-dereferencing uses. + LoopAllocator.Deallocate(L); + } }; // Implementation in LoopInfoImpl.h @@ -682,6 +792,7 @@ class LoopInfo : public LoopInfoBase<BasicBlock, Loop> { void operator=(const LoopInfo &) = delete; LoopInfo(const LoopInfo &) = delete; + public: LoopInfo() {} explicit LoopInfo(const DominatorTreeBase<BasicBlock, false> &DomTree); @@ -702,7 +813,7 @@ public: /// the loop forest and parent loops for each block so that \c L is no longer /// referenced, but does not actually delete \c L immediately. The pointer /// will remain valid until this LoopInfo's memory is released. - void markAsRemoved(Loop *L); + void erase(Loop *L); /// Returns true if replacing From with To everywhere is guaranteed to /// preserve LCSSA form. @@ -710,7 +821,8 @@ public: // Preserving LCSSA form is only problematic if the replacing value is an // instruction. Instruction *I = dyn_cast<Instruction>(To); - if (!I) return true; + if (!I) + return true; // If both instructions are defined in the same basic block then replacement // cannot break LCSSA form. if (I->getParent() == From->getParent()) @@ -718,7 +830,8 @@ public: // If the instruction is not defined in a loop then it can safely replace // anything. Loop *ToLoop = getLoopFor(I->getParent()); - if (!ToLoop) return true; + if (!ToLoop) + return true; // If the replacing instruction is defined in the same loop as the original // instruction, or in a loop that contains it as an inner loop, then using // it as a replacement will not break LCSSA form. @@ -798,7 +911,7 @@ public: }; // Allow clients to walk the list of nested loops... -template <> struct GraphTraits<const Loop*> { +template <> struct GraphTraits<const Loop *> { typedef const Loop *NodeRef; typedef LoopInfo::iterator ChildIteratorType; @@ -807,7 +920,7 @@ template <> struct GraphTraits<const Loop*> { static ChildIteratorType child_end(NodeRef N) { return N->end(); } }; -template <> struct GraphTraits<Loop*> { +template <> struct GraphTraits<Loop *> { typedef Loop *NodeRef; typedef LoopInfo::iterator ChildIteratorType; diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index e9177e68ed77..b3a16b5369f7 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -31,11 +31,12 @@ namespace llvm { /// outside of the loop. These are the blocks _inside of the current loop_ /// which branch out. The returned list is always unique. /// -template<class BlockT, class LoopT> -void LoopBase<BlockT, LoopT>:: -getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const { +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::getExitingBlocks( + SmallVectorImpl<BlockT *> &ExitingBlocks) const { + assert(!isInvalid() && "Loop not in a valid state!"); for (const auto BB : blocks()) - for (const auto &Succ : children<BlockT*>(BB)) + for (const auto &Succ : children<BlockT *>(BB)) if (!contains(Succ)) { // Not in current loop? It must be an exit block. ExitingBlocks.push_back(BB); @@ -45,9 +46,10 @@ getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const { /// getExitingBlock - If getExitingBlocks would return exactly one block, /// return that block. Otherwise return null. -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> BlockT *LoopBase<BlockT, LoopT>::getExitingBlock() const { - SmallVector<BlockT*, 8> ExitingBlocks; + assert(!isInvalid() && "Loop not in a valid state!"); + SmallVector<BlockT *, 8> ExitingBlocks; getExitingBlocks(ExitingBlocks); if (ExitingBlocks.size() == 1) return ExitingBlocks[0]; @@ -57,11 +59,12 @@ BlockT *LoopBase<BlockT, LoopT>::getExitingBlock() const { /// getExitBlocks - Return all of the successor blocks of this loop. These /// are the blocks _outside of the current loop_ which are branched to. /// -template<class BlockT, class LoopT> -void LoopBase<BlockT, LoopT>:: -getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const { +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::getExitBlocks( + SmallVectorImpl<BlockT *> &ExitBlocks) const { + assert(!isInvalid() && "Loop not in a valid state!"); for (const auto BB : blocks()) - for (const auto &Succ : children<BlockT*>(BB)) + for (const auto &Succ : children<BlockT *>(BB)) if (!contains(Succ)) // Not in current loop? It must be an exit block. ExitBlocks.push_back(Succ); @@ -69,9 +72,10 @@ getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const { /// getExitBlock - If getExitBlocks would return exactly one block, /// return that block. Otherwise return null. -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> BlockT *LoopBase<BlockT, LoopT>::getExitBlock() const { - SmallVector<BlockT*, 8> ExitBlocks; + assert(!isInvalid() && "Loop not in a valid state!"); + SmallVector<BlockT *, 8> ExitBlocks; getExitBlocks(ExitBlocks); if (ExitBlocks.size() == 1) return ExitBlocks[0]; @@ -79,11 +83,12 @@ BlockT *LoopBase<BlockT, LoopT>::getExitBlock() const { } /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). -template<class BlockT, class LoopT> -void LoopBase<BlockT, LoopT>:: -getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const { +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::getExitEdges( + SmallVectorImpl<Edge> &ExitEdges) const { + assert(!isInvalid() && "Loop not in a valid state!"); for (const auto BB : blocks()) - for (const auto &Succ : children<BlockT*>(BB)) + for (const auto &Succ : children<BlockT *>(BB)) if (!contains(Succ)) // Not in current loop? It must be an exit block. ExitEdges.emplace_back(BB, Succ); @@ -97,22 +102,24 @@ getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const { /// /// This method returns null if there is no preheader for the loop. /// -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> BlockT *LoopBase<BlockT, LoopT>::getLoopPreheader() const { + assert(!isInvalid() && "Loop not in a valid state!"); // Keep track of nodes outside the loop branching to the header... BlockT *Out = getLoopPredecessor(); - if (!Out) return nullptr; + if (!Out) + return nullptr; // Make sure we are allowed to hoist instructions into the predecessor. if (!Out->isLegalToHoistInto()) return nullptr; // Make sure there is only one exit out of the preheader. - typedef GraphTraits<BlockT*> BlockTraits; + typedef GraphTraits<BlockT *> BlockTraits; typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); ++SI; if (SI != BlockTraits::child_end(Out)) - return nullptr; // Multiple exits from the block, must not be a preheader. + return nullptr; // Multiple exits from the block, must not be a preheader. // The predecessor has exactly one successor, so it is a preheader. return Out; @@ -123,17 +130,18 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPreheader() const { /// This is less strict that the loop "preheader" concept, which requires /// the predecessor to have exactly one successor. /// -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { + assert(!isInvalid() && "Loop not in a valid state!"); // Keep track of nodes outside the loop branching to the header... BlockT *Out = nullptr; // Loop over the predecessors of the header node... BlockT *Header = getHeader(); - for (const auto Pred : children<Inverse<BlockT*>>(Header)) { - if (!contains(Pred)) { // If the block is not in the loop... + for (const auto Pred : children<Inverse<BlockT *>>(Header)) { + if (!contains(Pred)) { // If the block is not in the loop... if (Out && Out != Pred) - return nullptr; // Multiple predecessors outside the loop + return nullptr; // Multiple predecessors outside the loop Out = Pred; } } @@ -145,13 +153,15 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const { /// getLoopLatch - If there is a single latch block for this loop, return it. /// A latch block is a block that contains a branch back to the header. -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const { + assert(!isInvalid() && "Loop not in a valid state!"); BlockT *Header = getHeader(); BlockT *Latch = nullptr; - for (const auto Pred : children<Inverse<BlockT*>>(Header)) { + for (const auto Pred : children<Inverse<BlockT *>>(Header)) { if (contains(Pred)) { - if (Latch) return nullptr; + if (Latch) + return nullptr; Latch = Pred; } } @@ -169,14 +179,15 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const { /// to the specified LoopInfo object as being in the current basic block. It /// is not valid to replace the loop header with this method. /// -template<class BlockT, class LoopT> -void LoopBase<BlockT, LoopT>:: -addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) { +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::addBasicBlockToLoop( + BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) { + assert(!isInvalid() && "Loop not in a valid state!"); #ifndef NDEBUG if (!Blocks.empty()) { auto SameHeader = LIB[getHeader()]; - assert(contains(SameHeader) && getHeader() == SameHeader->getHeader() - && "Incorrect LI specified for this loop!"); + assert(contains(SameHeader) && getHeader() == SameHeader->getHeader() && + "Incorrect LI specified for this loop!"); } #endif assert(NewBB && "Cannot add a null basic block to the loop!"); @@ -198,9 +209,10 @@ addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) { /// the OldChild entry in our children list with NewChild, and updates the /// parent pointer of OldChild to be null and the NewChild to be this loop. /// This updates the loop depth of the new child. -template<class BlockT, class LoopT> -void LoopBase<BlockT, LoopT>:: -replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild) { +template <class BlockT, class LoopT> +void LoopBase<BlockT, LoopT>::replaceChildLoopWith(LoopT *OldChild, + LoopT *NewChild) { + assert(!isInvalid() && "Loop not in a valid state!"); assert(OldChild->ParentLoop == this && "This loop is already broken!"); assert(!NewChild->ParentLoop && "NewChild already has a parent!"); typename std::vector<LoopT *>::iterator I = find(SubLoops, OldChild); @@ -211,46 +223,48 @@ replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild) { } /// verifyLoop - Verify loop structure -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> void LoopBase<BlockT, LoopT>::verifyLoop() const { + assert(!isInvalid() && "Loop not in a valid state!"); #ifndef NDEBUG assert(!Blocks.empty() && "Loop header is missing"); // Setup for using a depth-first iterator to visit every block in the loop. - SmallVector<BlockT*, 8> ExitBBs; + SmallVector<BlockT *, 8> ExitBBs; getExitBlocks(ExitBBs); - df_iterator_default_set<BlockT*> VisitSet; + df_iterator_default_set<BlockT *> VisitSet; VisitSet.insert(ExitBBs.begin(), ExitBBs.end()); - df_ext_iterator<BlockT*, df_iterator_default_set<BlockT*>> - BI = df_ext_begin(getHeader(), VisitSet), - BE = df_ext_end(getHeader(), VisitSet); + df_ext_iterator<BlockT *, df_iterator_default_set<BlockT *>> + BI = df_ext_begin(getHeader(), VisitSet), + BE = df_ext_end(getHeader(), VisitSet); // Keep track of the BBs visited. - SmallPtrSet<BlockT*, 8> VisitedBBs; + SmallPtrSet<BlockT *, 8> VisitedBBs; // Check the individual blocks. - for ( ; BI != BE; ++BI) { + for (; BI != BE; ++BI) { BlockT *BB = *BI; - assert(std::any_of(GraphTraits<BlockT*>::child_begin(BB), - GraphTraits<BlockT*>::child_end(BB), - [&](BlockT *B){return contains(B);}) && + assert(std::any_of(GraphTraits<BlockT *>::child_begin(BB), + GraphTraits<BlockT *>::child_end(BB), + [&](BlockT *B) { return contains(B); }) && "Loop block has no in-loop successors!"); - assert(std::any_of(GraphTraits<Inverse<BlockT*> >::child_begin(BB), - GraphTraits<Inverse<BlockT*> >::child_end(BB), - [&](BlockT *B){return contains(B);}) && + assert(std::any_of(GraphTraits<Inverse<BlockT *>>::child_begin(BB), + GraphTraits<Inverse<BlockT *>>::child_end(BB), + [&](BlockT *B) { return contains(B); }) && "Loop block has no in-loop predecessors!"); SmallVector<BlockT *, 2> OutsideLoopPreds; - std::for_each(GraphTraits<Inverse<BlockT*> >::child_begin(BB), - GraphTraits<Inverse<BlockT*> >::child_end(BB), - [&](BlockT *B){if (!contains(B)) + std::for_each(GraphTraits<Inverse<BlockT *>>::child_begin(BB), + GraphTraits<Inverse<BlockT *>>::child_end(BB), + [&](BlockT *B) { + if (!contains(B)) OutsideLoopPreds.push_back(B); }); if (BB == getHeader()) { - assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); + assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); } else if (!OutsideLoopPreds.empty()) { // A non-header loop shouldn't be reachable from outside the loop, // though it is permitted if the predecessor is not itself actually @@ -282,8 +296,8 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const { // Each block in each subloop should be contained within this loop. for (block_iterator BI = (*I)->block_begin(), BE = (*I)->block_end(); BI != BE; ++BI) { - assert(contains(*BI) && - "Loop does not contain all the blocks of a subloop!"); + assert(contains(*BI) && + "Loop does not contain all the blocks of a subloop!"); } // Check the parent loop pointer. @@ -295,9 +309,10 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const { } /// verifyLoop - Verify loop structure of this loop and all nested loops. -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> void LoopBase<BlockT, LoopT>::verifyLoopNest( - DenseSet<const LoopT*> *Loops) const { + DenseSet<const LoopT *> *Loops) const { + assert(!isInvalid() && "Loop not in a valid state!"); Loops->insert(static_cast<const LoopT *>(this)); // Verify this loop. verifyLoop(); @@ -306,30 +321,34 @@ void LoopBase<BlockT, LoopT>::verifyLoopNest( (*I)->verifyLoopNest(Loops); } -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth, bool Verbose) const { - OS.indent(Depth*2) << "Loop at depth " << getLoopDepth() - << " containing: "; + OS.indent(Depth * 2) << "Loop at depth " << getLoopDepth() << " containing: "; BlockT *H = getHeader(); for (unsigned i = 0; i < getBlocks().size(); ++i) { BlockT *BB = getBlocks()[i]; if (!Verbose) { - if (i) OS << ","; + if (i) + OS << ","; BB->printAsOperand(OS, false); - } else OS << "\n"; - - if (BB == H) OS << "<header>"; - if (isLoopLatch(BB)) OS << "<latch>"; - if (isLoopExiting(BB)) OS << "<exiting>"; + } else + OS << "\n"; + + if (BB == H) + OS << "<header>"; + if (isLoopLatch(BB)) + OS << "<latch>"; + if (isLoopExiting(BB)) + OS << "<exiting>"; if (Verbose) BB->print(OS); } OS << "\n"; for (iterator I = begin(), E = end(); I != E; ++I) - (*I)->print(OS, Depth+2); + (*I)->print(OS, Depth + 2); } //===----------------------------------------------------------------------===// @@ -341,10 +360,10 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth, /// this loop are mapped to this loop or a subloop. And all subloops within this /// loop have their parent loop set to this loop or a subloop. template <class BlockT, class LoopT> -static void discoverAndMapSubloop( - LoopT *L, ArrayRef<BlockT *> Backedges, LoopInfoBase<BlockT, LoopT> *LI, - const DomTreeBase<BlockT> &DomTree) { - typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits; +static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT *> Backedges, + LoopInfoBase<BlockT, LoopT> *LI, + const DomTreeBase<BlockT> &DomTree) { + typedef GraphTraits<Inverse<BlockT *>> InvBlockTraits; unsigned NumBlocks = 0; unsigned NumSubloops = 0; @@ -364,13 +383,12 @@ static void discoverAndMapSubloop( LI->changeLoopFor(PredBB, L); ++NumBlocks; if (PredBB == L->getHeader()) - continue; + continue; // Push all block predecessors on the worklist. ReverseCFGWorklist.insert(ReverseCFGWorklist.end(), InvBlockTraits::child_begin(PredBB), InvBlockTraits::child_end(PredBB)); - } - else { + } else { // This is a discovered block. Find its outermost discovered loop. while (LoopT *Parent = Subloop->getParentLoop()) Subloop = Parent; @@ -382,13 +400,13 @@ static void discoverAndMapSubloop( // Discover a subloop of this loop. Subloop->setParentLoop(L); ++NumSubloops; - NumBlocks += Subloop->getBlocks().capacity(); + NumBlocks += Subloop->getBlocksVector().capacity(); PredBB = Subloop->getHeader(); // Continue traversal along predecessors that are not loop-back edges from // within this subloop tree itself. Note that a predecessor may directly // reach another subloop that is not yet discovered to be a subloop of // this loop, which we must traverse. - for (const auto Pred : children<Inverse<BlockT*>>(PredBB)) { + for (const auto Pred : children<Inverse<BlockT *>>(PredBB)) { if (LI->getLoopFor(Pred) != Subloop) ReverseCFGWorklist.push_back(Pred); } @@ -399,15 +417,14 @@ static void discoverAndMapSubloop( } /// Populate all loop data in a stable order during a single forward DFS. -template<class BlockT, class LoopT> -class PopulateLoopsDFS { - typedef GraphTraits<BlockT*> BlockTraits; +template <class BlockT, class LoopT> class PopulateLoopsDFS { + typedef GraphTraits<BlockT *> BlockTraits; typedef typename BlockTraits::ChildIteratorType SuccIterTy; LoopInfoBase<BlockT, LoopT> *LI; + public: - PopulateLoopsDFS(LoopInfoBase<BlockT, LoopT> *li): - LI(li) {} + PopulateLoopsDFS(LoopInfoBase<BlockT, LoopT> *li) : LI(li) {} void traverse(BlockT *EntryBlock); @@ -416,7 +433,7 @@ protected: }; /// Top-level driver for the forward DFS within the loop. -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> void PopulateLoopsDFS<BlockT, LoopT>::traverse(BlockT *EntryBlock) { for (BlockT *BB : post_order(EntryBlock)) insertIntoLoop(BB); @@ -425,7 +442,7 @@ void PopulateLoopsDFS<BlockT, LoopT>::traverse(BlockT *EntryBlock) { /// Add a single Block to its ancestor loops in PostOrder. If the block is a /// subloop header, add the subloop to its parent in PostOrder, then reverse the /// Block and Subloop vectors of the now complete subloop to achieve RPO. -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) { LoopT *Subloop = LI->getLoopFor(Block); if (Subloop && Block == Subloop->getHeader()) { @@ -463,8 +480,7 @@ void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) { /// The Block vectors are inclusive, so step 3 requires loop-depth number of /// insertions per block. template <class BlockT, class LoopT> -void LoopInfoBase<BlockT, LoopT>::analyze( - const DomTreeBase<BlockT> &DomTree) { +void LoopInfoBase<BlockT, LoopT>::analyze(const DomTreeBase<BlockT> &DomTree) { // Postorder traversal of the dominator tree. const DomTreeNodeBase<BlockT> *DomRoot = DomTree.getRootNode(); for (auto DomNode : post_order(DomRoot)) { @@ -473,17 +489,17 @@ void LoopInfoBase<BlockT, LoopT>::analyze( SmallVector<BlockT *, 4> Backedges; // Check each predecessor of the potential loop header. - for (const auto Backedge : children<Inverse<BlockT*>>(Header)) { + for (const auto Backedge : children<Inverse<BlockT *>>(Header)) { // If Header dominates predBB, this is a new loop. Collect the backedges. - if (DomTree.dominates(Header, Backedge) - && DomTree.isReachableFromEntry(Backedge)) { + if (DomTree.dominates(Header, Backedge) && + DomTree.isReachableFromEntry(Backedge)) { Backedges.push_back(Backedge); } } // Perform a backward CFG traversal to discover and map blocks in this loop. if (!Backedges.empty()) { - LoopT *L = new LoopT(Header); - discoverAndMapSubloop(L, ArrayRef<BlockT*>(Backedges), this, DomTree); + LoopT *L = AllocateLoop(Header); + discoverAndMapSubloop(L, ArrayRef<BlockT *>(Backedges), this, DomTree); } } // Perform a single forward CFG traversal to populate block and subloop @@ -542,7 +558,7 @@ LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() { } // Debugging -template<class BlockT, class LoopT> +template <class BlockT, class LoopT> void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const { for (unsigned i = 0; i < TopLevelLoops.size(); ++i) TopLevelLoops[i]->print(OS); @@ -607,13 +623,13 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL, template <class BlockT, class LoopT> void LoopInfoBase<BlockT, LoopT>::verify( const DomTreeBase<BlockT> &DomTree) const { - DenseSet<const LoopT*> Loops; + DenseSet<const LoopT *> Loops; for (iterator I = begin(), E = end(); I != E; ++I) { assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); (*I)->verifyLoopNest(&Loops); } - // Verify that blocks are mapped to valid loops. +// Verify that blocks are mapped to valid loops. #ifndef NDEBUG for (auto &Entry : BBMap) { const BlockT *BB = Entry.first; diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h index 75e7688bbdc2..86cfecd9df11 100644 --- a/include/llvm/Analysis/LoopPass.h +++ b/include/llvm/Analysis/LoopPass.h @@ -129,6 +129,9 @@ public: // Add a new loop into the loop queue. void addLoop(Loop &L); + // Mark \p L as deleted. + void markLoopAsDeleted(Loop &L); + //===--------------------------------------------------------------------===// /// SimpleAnalysis - Provides simple interface to update analysis info /// maintained by various passes. Note, if required this interface can @@ -152,6 +155,7 @@ private: std::deque<Loop *> LQ; LoopInfo *LI; Loop *CurrentLoop; + bool CurrentLoopDeleted; }; // This pass is required by the LCSSA transformation. It is used inside diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 23ab372703ee..7d53e34938b7 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -1,4 +1,4 @@ -//===- llvm/Analysis/MemoryBuiltins.h- Calls to memory builtins -*- C++ -*-===// +//==- llvm/Analysis/MemoryBuiltins.h - Calls to memory builtins --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -15,21 +15,42 @@ #ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H #define LLVM_ANALYSIS_MEMORYBUILTINS_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/TargetFolder.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/DataTypes.h" +#include <cstdint> +#include <utility> namespace llvm { + +class AllocaInst; +class Argument; class CallInst; -class PointerType; +class ConstantInt; +class ConstantPointerNull; class DataLayout; +class ExtractElementInst; +class ExtractValueInst; +class GEPOperator; +class GlobalAlias; +class GlobalVariable; +class Instruction; +class IntegerType; +class IntrinsicInst; +class IntToPtrInst; +class LLVMContext; +class LoadInst; +class PHINode; +class PointerType; +class SelectInst; class TargetLibraryInfo; class Type; +class UndefValue; class Value; /// \brief Tests if a value is a call or invoke to a library function that @@ -71,8 +92,7 @@ bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI); -static inline CallInst *extractMallocCall(Value *I, - const TargetLibraryInfo *TLI) { +inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) { return const_cast<CallInst*>(extractMallocCall((const Value*)I, TLI)); } @@ -106,8 +126,7 @@ Value *getMallocArraySize(CallInst *CI, const DataLayout &DL, /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. const CallInst *extractCallocCall(const Value *I, const TargetLibraryInfo *TLI); -static inline CallInst *extractCallocCall(Value *I, - const TargetLibraryInfo *TLI) { +inline CallInst *extractCallocCall(Value *I, const TargetLibraryInfo *TLI) { return const_cast<CallInst*>(extractCallocCall((const Value*)I, TLI)); } @@ -119,11 +138,10 @@ static inline CallInst *extractCallocCall(Value *I, /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI); -static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { +inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { return const_cast<CallInst*>(isFreeCall((const Value*)I, TLI)); } - //===----------------------------------------------------------------------===// // Utility functions to compute size of objects. // @@ -169,13 +187,12 @@ ConstantInt *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const TargetLibraryInfo *TLI, bool MustSucceed); -typedef std::pair<APInt, APInt> SizeOffsetType; +using SizeOffsetType = std::pair<APInt, APInt>; /// \brief Evaluate the size and offset of an object pointed to by a Value* /// statically. Fails if size or offset are not known at compile time. class ObjectSizeOffsetVisitor : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> { - const DataLayout &DL; const TargetLibraryInfo *TLI; ObjectSizeOpts Options; @@ -229,18 +246,16 @@ private: bool CheckedZextOrTrunc(APInt &I); }; -typedef std::pair<Value*, Value*> SizeOffsetEvalType; - +using SizeOffsetEvalType = std::pair<Value *, Value *>; /// \brief Evaluate the size and offset of an object pointed to by a Value*. /// May create code to compute the result at run-time. class ObjectSizeOffsetEvaluator : public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> { - - typedef IRBuilder<TargetFolder> BuilderTy; - typedef std::pair<WeakTrackingVH, WeakTrackingVH> WeakEvalType; - typedef DenseMap<const Value*, WeakEvalType> CacheMapTy; - typedef SmallPtrSet<const Value*, 8> PtrSetTy; + using BuilderTy = IRBuilder<TargetFolder>; + using WeakEvalType = std::pair<WeakTrackingVH, WeakTrackingVH>; + using CacheMapTy = DenseMap<const Value *, WeakEvalType>; + using PtrSetTy = SmallPtrSet<const Value *, 8>; const DataLayout &DL; const TargetLibraryInfo *TLI; @@ -255,11 +270,13 @@ class ObjectSizeOffsetEvaluator SizeOffsetEvalType unknown() { return std::make_pair(nullptr, nullptr); } + SizeOffsetEvalType compute_(Value *V); public: ObjectSizeOffsetEvaluator(const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign = false); + SizeOffsetEvalType compute(Value *V); bool knownSize(SizeOffsetEvalType SizeOffset) { @@ -291,6 +308,6 @@ public: SizeOffsetEvalType visitInstruction(Instruction &I); }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_MEMORYBUILTINS_H diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 1dbbf6cc6add..c2974525a6ff 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -1,4 +1,4 @@ -//===- llvm/Analysis/MemoryDependenceAnalysis.h - Memory Deps --*- C++ -*-===// +//===- llvm/Analysis/MemoryDependenceAnalysis.h - Memory Deps ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,26 +15,35 @@ #define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerEmbeddedInt.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PredIteratorCache.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstdint> +#include <utility> +#include <vector> namespace llvm { -class Function; -class FunctionPass; -class Instruction; -class CallSite; + class AssumptionCache; -class MemoryDependenceResults; -class PredIteratorCache; +class CallSite; class DominatorTree; +class Function; +class Instruction; +class LoadInst; class PHITransAddr; +class TargetLibraryInfo; +class Value; /// A memory dependence query can return one of three different answers. class MemDepResult { @@ -105,17 +114,17 @@ class MemDepResult { Unknown }; - typedef PointerSumType< + using ValueTy = PointerSumType< DepType, PointerSumTypeMember<Invalid, Instruction *>, PointerSumTypeMember<Clobber, Instruction *>, PointerSumTypeMember<Def, Instruction *>, - PointerSumTypeMember<Other, PointerEmbeddedInt<OtherType, 3>>> - ValueTy; + PointerSumTypeMember<Other, PointerEmbeddedInt<OtherType, 3>>>; ValueTy Value; + explicit MemDepResult(ValueTy V) : Value(V) {} public: - MemDepResult() : Value() {} + MemDepResult() = default; /// get methods: These are static ctor methods for creating various /// MemDepResult kinds. @@ -266,23 +275,23 @@ public: /// internal caching mechanism. class MemoryDependenceResults { // A map from instructions to their dependency. - typedef DenseMap<Instruction *, MemDepResult> LocalDepMapType; + using LocalDepMapType = DenseMap<Instruction *, MemDepResult>; LocalDepMapType LocalDeps; public: - typedef std::vector<NonLocalDepEntry> NonLocalDepInfo; + using NonLocalDepInfo = std::vector<NonLocalDepEntry>; private: /// A pair<Value*, bool> where the bool is true if the dependence is a read /// only dependence, false if read/write. - typedef PointerIntPair<const Value *, 1, bool> ValueIsLoadPair; + using ValueIsLoadPair = PointerIntPair<const Value *, 1, bool>; /// This pair is used when caching information for a block. /// /// If the pointer is null, the cache value is not a full query that starts /// at the specified block. If non-null, the bool indicates whether or not /// the contents of the block was skipped. - typedef PointerIntPair<BasicBlock *, 1, bool> BBSkipFirstBlockPair; + using BBSkipFirstBlockPair = PointerIntPair<BasicBlock *, 1, bool>; /// This record is the information kept for each (value, is load) pair. struct NonLocalPointerInfo { @@ -293,31 +302,32 @@ private: /// The maximum size of the dereferences of the pointer. /// /// May be UnknownSize if the sizes are unknown. - uint64_t Size; + uint64_t Size = MemoryLocation::UnknownSize; /// The AA tags associated with dereferences of the pointer. /// /// The members may be null if there are no tags or conflicting tags. AAMDNodes AATags; - NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {} + NonLocalPointerInfo() = default; }; /// Cache storing single nonlocal def for the instruction. /// It is set when nonlocal def would be found in function returning only /// local dependencies. DenseMap<Instruction *, NonLocalDepResult> NonLocalDefsCache; + /// This map stores the cached results of doing a pointer lookup at the /// bottom of a block. /// /// The key of this map is the pointer+isload bit, the value is a list of /// <bb->result> mappings. - typedef DenseMap<ValueIsLoadPair, NonLocalPointerInfo> - CachedNonLocalPointerInfo; + using CachedNonLocalPointerInfo = + DenseMap<ValueIsLoadPair, NonLocalPointerInfo>; CachedNonLocalPointerInfo NonLocalPointerDeps; // A map from instructions to their non-local pointer dependencies. - typedef DenseMap<Instruction *, SmallPtrSet<ValueIsLoadPair, 4>> - ReverseNonLocalPtrDepTy; + using ReverseNonLocalPtrDepTy = + DenseMap<Instruction *, SmallPtrSet<ValueIsLoadPair, 4>>; ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps; /// This is the instruction we keep for each cached access that we have for @@ -325,17 +335,17 @@ private: /// /// The pointer is an owning pointer and the bool indicates whether we have /// any dirty bits in the set. - typedef std::pair<NonLocalDepInfo, bool> PerInstNLInfo; + using PerInstNLInfo = std::pair<NonLocalDepInfo, bool>; // A map from instructions to their non-local dependencies. - typedef DenseMap<Instruction *, PerInstNLInfo> NonLocalDepMapType; + using NonLocalDepMapType = DenseMap<Instruction *, PerInstNLInfo>; NonLocalDepMapType NonLocalDeps; // A reverse mapping from dependencies to the dependees. This is // used when removing instructions to keep the cache coherent. - typedef DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> - ReverseDepMapType; + using ReverseDepMapType = + DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>>; ReverseDepMapType ReverseLocalDeps; // A reverse mapping from dependencies to the non-local dependees. @@ -493,10 +503,11 @@ private: class MemoryDependenceAnalysis : public AnalysisInfoMixin<MemoryDependenceAnalysis> { friend AnalysisInfoMixin<MemoryDependenceAnalysis>; + static AnalysisKey Key; public: - typedef MemoryDependenceResults Result; + using Result = MemoryDependenceResults; MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM); }; @@ -505,10 +516,12 @@ public: /// MemoryDepnedenceResults instance. class MemoryDependenceWrapperPass : public FunctionPass { Optional<MemoryDependenceResults> MemDep; + public: + static char ID; + MemoryDependenceWrapperPass(); ~MemoryDependenceWrapperPass() override; - static char ID; /// Pass Implementation stuff. This doesn't do any analysis eagerly. bool runOnFunction(Function &) override; @@ -522,6 +535,6 @@ public: MemoryDependenceResults &getMemDep() { return *MemDep; } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H diff --git a/include/llvm/Analysis/MemoryLocation.h b/include/llvm/Analysis/MemoryLocation.h index f2cb2a123f2e..c1080742e83a 100644 --- a/include/llvm/Analysis/MemoryLocation.h +++ b/include/llvm/Analysis/MemoryLocation.h @@ -16,6 +16,7 @@ #ifndef LLVM_ANALYSIS_MEMORYLOCATION_H #define LLVM_ANALYSIS_MEMORYLOCATION_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" @@ -68,17 +69,23 @@ public: static MemoryLocation get(const AtomicCmpXchgInst *CXI); static MemoryLocation get(const AtomicRMWInst *RMWI); static MemoryLocation get(const Instruction *Inst) { - if (auto *I = dyn_cast<LoadInst>(Inst)) - return get(I); - else if (auto *I = dyn_cast<StoreInst>(Inst)) - return get(I); - else if (auto *I = dyn_cast<VAArgInst>(Inst)) - return get(I); - else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst)) - return get(I); - else if (auto *I = dyn_cast<AtomicRMWInst>(Inst)) - return get(I); - llvm_unreachable("unsupported memory instruction"); + return *MemoryLocation::getOrNone(Inst); + } + static Optional<MemoryLocation> getOrNone(const Instruction *Inst) { + switch (Inst->getOpcode()) { + case Instruction::Load: + return get(cast<LoadInst>(Inst)); + case Instruction::Store: + return get(cast<StoreInst>(Inst)); + case Instruction::VAArg: + return get(cast<VAArgInst>(Inst)); + case Instruction::AtomicCmpXchg: + return get(cast<AtomicCmpXchgInst>(Inst)); + case Instruction::AtomicRMW: + return get(cast<AtomicRMWInst>(Inst)); + default: + return None; + } } /// Return a location representing the source of a memory transfer. diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h index 5cec2bfb0cfb..d19f08453ee6 100644 --- a/include/llvm/Analysis/MemorySSA.h +++ b/include/llvm/Analysis/MemorySSA.h @@ -6,7 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// +// /// \file /// \brief This file exposes an interface to building/using memory SSA to /// walk memory instructions using a use/def graph. @@ -67,6 +67,7 @@ /// MemoryDefs are not disambiguated because it would require multiple reaching /// definitions, which would require multiple phis, and multiple memoryaccesses /// per instruction. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_MEMORYSSA_H @@ -80,6 +81,7 @@ #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/simple_ilist.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PHITransAddr.h" @@ -87,14 +89,12 @@ #include "llvm/IR/DerivedUser.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" -#include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -107,12 +107,16 @@ namespace llvm { class Function; class Instruction; class MemoryAccess; +class MemorySSAWalker; class LLVMContext; class raw_ostream; + namespace MSSAHelpers { + struct AllAccessTag {}; struct DefsOnlyTag {}; -} + +} // end namespace MSSAHelpers enum { // Used to signify what the default invalid ID is for MemoryAccess's @@ -137,6 +141,11 @@ public: using DefsOnlyType = ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>; + MemoryAccess(const MemoryAccess &) = delete; + MemoryAccess &operator=(const MemoryAccess &) = delete; + + void *operator new(size_t) = delete; + // Methods for support type inquiry through isa, cast, and // dyn_cast static bool classof(const Value *V) { @@ -144,19 +153,14 @@ public: return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal; } - MemoryAccess(const MemoryAccess &) = delete; - MemoryAccess &operator=(const MemoryAccess &) = delete; - - void *operator new(size_t) = delete; - BasicBlock *getBlock() const { return Block; } void print(raw_ostream &OS) const; void dump() const; /// \brief The user iterators for a memory access - typedef user_iterator iterator; - typedef const_user_iterator const_iterator; + using iterator = user_iterator; + using const_iterator = const_user_iterator; /// \brief This iterator walks over all of the defs in a given /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For @@ -194,11 +198,11 @@ public: } protected: - friend class MemorySSA; - friend class MemoryUseOrDef; - friend class MemoryUse; friend class MemoryDef; friend class MemoryPhi; + friend class MemorySSA; + friend class MemoryUse; + friend class MemoryUseOrDef; /// \brief Used by MemorySSA to change the block of a MemoryAccess when it is /// moved. @@ -259,11 +263,13 @@ public: protected: friend class MemorySSA; friend class MemorySSAUpdater; + MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty, DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB) : MemoryAccess(C, Vty, DeleteValue, BB, 1), MemoryInst(MI) { setDefiningAccess(DMA); } + void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) { if (!Optimized) { setOperand(0, DMA); @@ -291,8 +297,7 @@ public: DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) - : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB), - OptimizedID(0) {} + : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB) {} // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } @@ -315,6 +320,7 @@ public: MemoryAccess *getOptimized() const { return getDefiningAccess(); } + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } @@ -325,7 +331,7 @@ protected: private: static void deleteMe(DerivedUser *Self); - unsigned int OptimizedID; + unsigned int OptimizedID = 0; }; template <> @@ -343,12 +349,13 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess) /// MemoryDef/MemoryPhi. class MemoryDef final : public MemoryUseOrDef { public: + friend class MemorySSA; + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, unsigned Ver) - : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB), - ID(Ver), Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {} + : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB), ID(Ver) {} // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } @@ -361,27 +368,28 @@ public: Optimized = MA; OptimizedID = getDefiningAccess()->getID(); } + MemoryAccess *getOptimized() const { return Optimized; } + bool isOptimized() const { return getOptimized() && getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); } + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } void print(raw_ostream &OS) const; - friend class MemorySSA; - unsigned getID() const { return ID; } private: static void deleteMe(DerivedUser *Self); const unsigned ID; - MemoryAccess *Optimized; - unsigned int OptimizedID; + MemoryAccess *Optimized = nullptr; + unsigned int OptimizedID = INVALID_MEMORYACCESS_ID; }; template <> @@ -436,8 +444,8 @@ public: // Block iterator interface. This provides access to the list of incoming // basic blocks, which parallels the list of incoming values. - typedef BasicBlock **block_iterator; - typedef BasicBlock *const *const_block_iterator; + using block_iterator = BasicBlock **; + using const_block_iterator = BasicBlock *const *; block_iterator block_begin() { auto *Ref = reinterpret_cast<Use::UserRef *>(op_begin() + ReservedSpace); @@ -477,6 +485,7 @@ public: assert(V && "PHI node got a null value!"); setOperand(I, V); } + static unsigned getOperandNumForIncomingValue(unsigned I) { return I; } static unsigned getIncomingValueNumForOperand(unsigned I) { return I; } @@ -595,12 +604,9 @@ inline void MemoryUseOrDef::resetOptimized() { cast<MemoryUse>(this)->resetOptimized(); } - template <> struct OperandTraits<MemoryPhi> : public HungoffOperandTraits<2> {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess) -class MemorySSAWalker; - /// \brief Encapsulates MemorySSA, including all data associated with memory /// accesses. class MemorySSA { @@ -707,11 +713,13 @@ protected: void moveTo(MemoryUseOrDef *What, BasicBlock *BB, AccessList::iterator Where); void moveTo(MemoryUseOrDef *What, BasicBlock *BB, InsertionPlace Point); + // Rename the dominator tree branch rooted at BB. void renamePass(BasicBlock *BB, MemoryAccess *IncomingVal, SmallPtrSetImpl<BasicBlock *> &Visited) { renamePass(DT->getNode(BB), IncomingVal, Visited, true, true); } + void removeFromLookups(MemoryAccess *); void removeFromLists(MemoryAccess *, bool ShouldDelete = true); void insertIntoListsForBlock(MemoryAccess *, const BasicBlock *, @@ -729,6 +737,7 @@ private: void optimizeUses(); void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const; + using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>; using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>; @@ -755,6 +764,7 @@ private: // Memory SSA mappings DenseMap<const Value *, MemoryAccess *> ValueToMemoryAccess; + // These two mappings contain the main block to access/def mappings for // MemorySSA. The list contained in PerBlockAccesses really owns all the // MemoryAccesses. @@ -779,8 +789,9 @@ private: // Internal MemorySSA utils, for use by MemorySSA classes and walkers class MemorySSAUtil { protected: - friend class MemorySSAWalker; friend class GVNHoist; + friend class MemorySSAWalker; + // This function should not be used by new passes. static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, AliasAnalysis &AA); @@ -811,6 +822,7 @@ public: // unique_ptr<MemorySSA> to avoid build breakage on MSVC. struct Result { Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {} + MemorySSA &getMSSA() { return *MSSA.get(); } std::unique_ptr<MemorySSA> MSSA; @@ -978,6 +990,7 @@ public: assert(MP && "Tried to get phi arg block when not iterating over a PHI"); return MP->getIncomingBlock(ArgNo); } + typename BaseT::iterator::pointer operator*() const { assert(Access && "Tried to access past the end of our iterator"); // Go to the first argument for phis, and the defining access for everything @@ -986,6 +999,7 @@ public: return MP->getIncomingValue(ArgNo); return cast<MemoryUseOrDef>(Access)->getDefiningAccess(); } + using BaseT::operator++; memoryaccess_def_iterator &operator++() { assert(Access && "Hit end of iterator"); diff --git a/include/llvm/Analysis/ModuleSummaryAnalysis.h b/include/llvm/Analysis/ModuleSummaryAnalysis.h index 4f77170d9f68..9af7859cb4bf 100644 --- a/include/llvm/Analysis/ModuleSummaryAnalysis.h +++ b/include/llvm/Analysis/ModuleSummaryAnalysis.h @@ -14,13 +14,17 @@ #ifndef LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H #define LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Optional.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include <functional> namespace llvm { + class BlockFrequencyInfo; +class Function; +class Module; class ProfileSummaryInfo; /// Direct function to compute a \c ModuleSummaryIndex from a given module. @@ -38,10 +42,11 @@ ModuleSummaryIndex buildModuleSummaryIndex( class ModuleSummaryIndexAnalysis : public AnalysisInfoMixin<ModuleSummaryIndexAnalysis> { friend AnalysisInfoMixin<ModuleSummaryIndexAnalysis>; + static AnalysisKey Key; public: - typedef ModuleSummaryIndex Result; + using Result = ModuleSummaryIndex; Result run(Module &M, ModuleAnalysisManager &AM); }; @@ -70,6 +75,7 @@ public: // object for the module, to be written to bitcode or LLVM assembly. // ModulePass *createModuleSummaryIndexWrapperPass(); -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H diff --git a/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/include/llvm/Analysis/OptimizationRemarkEmitter.h index 64dd0737a112..26f32acdcda5 100644 --- a/include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ b/include/llvm/Analysis/OptimizationRemarkEmitter.h @@ -1,4 +1,4 @@ -//===- OptimizationDiagnosticInfo.h - Optimization Diagnostic ---*- C++ -*-===// +//===- OptimizationRemarkEmitter.h - Optimization Diagnostic ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,7 +24,6 @@ namespace llvm { class DebugLoc; -class LLVMContext; class Loop; class Pass; class Twine; @@ -69,11 +68,23 @@ public: /// \brief Output the remark via the diagnostic handler and to the /// optimization record file. - /// - /// This is the new interface that should be now used rather than the legacy - /// emit* APIs. void emit(DiagnosticInfoOptimizationBase &OptDiag); + /// \brief Take a lambda that returns a remark which will be emitted. Second + /// argument is only used to restrict this to functions. + template <typename T> + void emit(T RemarkBuilder, decltype(RemarkBuilder()) * = nullptr) { + // Avoid building the remark unless we know there are at least *some* + // remarks enabled. We can't currently check whether remarks are requested + // for the calling pass since that requires actually building the remark. + + if (F->getContext().getDiagnosticsOutputFile() || + F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) { + auto R = RemarkBuilder(); + emit((DiagnosticInfoOptimizationBase &)R); + } + } + /// \brief Whether we allow for extra compile-time budget to perform more /// analysis to produce fewer false positives. /// @@ -81,10 +92,9 @@ public: /// use the extra analysis (1) to filter trivial false positives or (2) to /// provide more context so that non-trivial false positives can be quickly /// detected by the user. - bool allowExtraAnalysis() const { - // For now, only allow this with -fsave-optimization-record since the -Rpass - // options are handled in the front-end. - return F->getContext().getDiagnosticsOutputFile(); + bool allowExtraAnalysis(StringRef PassName) const { + return (F->getContext().getDiagnosticsOutputFile() || + F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(PassName)); } private: @@ -154,11 +164,5 @@ public: /// \brief Run the analysis pass over a function and produce BFI. Result run(Function &F, FunctionAnalysisManager &AM); }; - -namespace yaml { -template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> { - static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag); -}; -} } #endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index 17f2e8eaf4a2..381e65539c4e 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -1,4 +1,4 @@ -//=- llvm/Analysis/PostDominators.h - Post Dominator Calculation-*- C++ -*-===// +//=- llvm/Analysis/PostDominators.h - Post Dominator Calculation --*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -14,16 +14,20 @@ #ifndef LLVM_ANALYSIS_POSTDOMINATORS_H #define LLVM_ANALYSIS_POSTDOMINATORS_H +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { +class Function; +class raw_ostream; + /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to /// compute the post-dominator tree. -/// struct PostDominatorTree : public PostDomTreeBase<BasicBlock> { - typedef PostDomTreeBase<BasicBlock> Base; + using Base = PostDomTreeBase<BasicBlock>; /// Handle invalidation explicitly. bool invalidate(Function &F, const PreservedAnalyses &PA, @@ -34,11 +38,12 @@ struct PostDominatorTree : public PostDomTreeBase<BasicBlock> { class PostDominatorTreeAnalysis : public AnalysisInfoMixin<PostDominatorTreeAnalysis> { friend AnalysisInfoMixin<PostDominatorTreeAnalysis>; + static AnalysisKey Key; public: - /// \brief Provide the result typedef for this analysis pass. - typedef PostDominatorTree Result; + /// \brief Provide the result type for this analysis pass. + using Result = PostDominatorTree; /// \brief Run the analysis pass over a function and produce a post dominator /// tree. @@ -52,11 +57,13 @@ class PostDominatorTreePrinterPass public: explicit PostDominatorTreePrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; struct PostDominatorTreeWrapperPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid + PostDominatorTree DT; PostDominatorTreeWrapperPass() : FunctionPass(ID) { @@ -99,6 +106,6 @@ template <> struct GraphTraits<PostDominatorTree*> } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_POSTDOMINATORS_H diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index 6aaabe1d1889..bd7b00374821 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -49,6 +49,10 @@ private: void computeThresholds(); // Count thresholds to answer isHotCount and isColdCount queries. Optional<uint64_t> HotCountThreshold, ColdCountThreshold; + // True if the working set size of the code is considered huge, + // because the number of profile counts required to reach the hot + // percentile is above a huge threshold. + Optional<bool> HasHugeWorkingSetSize; public: ProfileSummaryInfo(Module &M) : M(M) {} @@ -84,6 +88,8 @@ public: /// Returns the profile count for \p CallInst. Optional<uint64_t> getProfileCount(const Instruction *CallInst, BlockFrequencyInfo *BFI); + /// Returns true if the working set size of the code is considered huge. + bool hasHugeWorkingSetSize(); /// \brief Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); /// Returns true if \p F has hot function entry or hot call edge. @@ -104,6 +110,14 @@ public: bool isHotCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); /// \brief Returns true if Callsite \p CS is considered cold. bool isColdCallSite(const CallSite &CS, BlockFrequencyInfo *BFI); + /// \brief Returns HotCountThreshold if set. + uint64_t getHotCountThreshold() { + return HotCountThreshold ? HotCountThreshold.getValue() : 0; + } + /// \brief Returns ColdCountThreshold if set. + uint64_t getColdCountThreshold() { + return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; + } }; /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h index 2fe7c6725266..9f156a1a6029 100644 --- a/include/llvm/Analysis/PtrUseVisitor.h +++ b/include/llvm/Analysis/PtrUseVisitor.h @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file provides a collection of visitors which walk the (instruction) /// uses of a pointer. These visitors all provide the same essential behavior @@ -16,23 +17,36 @@ /// global variable, or function argument. /// /// FIXME: Provide a variant which doesn't track offsets and is cheaper. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_PTRUSEVISITOR_H #define LLVM_ANALYSIS_PTRUSEVISITOR_H #include "llvm/ADT/APInt.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/Compiler.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <type_traits> namespace llvm { namespace detail { + /// \brief Implementation of non-dependent functionality for \c PtrUseVisitor. /// /// See \c PtrUseVisitor for the public interface and detailed comments about @@ -115,7 +129,8 @@ protected: /// This is used to maintain a worklist fo to-visit uses. This is used to /// make the visit be iterative rather than recursive. struct UseToVisit { - typedef PointerIntPair<Use *, 1, bool> UseAndIsOffsetKnownPair; + using UseAndIsOffsetKnownPair = PointerIntPair<Use *, 1, bool>; + UseAndIsOffsetKnownPair UseAndIsOffsetKnown; APInt Offset; }; @@ -128,7 +143,6 @@ protected: /// @} - /// \name Per-visit state /// This state is reset for each instruction visited. /// @{ @@ -145,7 +159,6 @@ protected: /// @} - /// Note that the constructor is protected because this class must be a base /// class, we can't create instances directly of this class. PtrUseVisitorBase(const DataLayout &DL) : DL(DL) {} @@ -162,6 +175,7 @@ protected: /// offsets and looking through GEPs. bool adjustOffsetForGEP(GetElementPtrInst &GEPI); }; + } // end namespace detail /// \brief A base class for visitors over the uses of a pointer value. @@ -193,7 +207,8 @@ template <typename DerivedT> class PtrUseVisitor : protected InstVisitor<DerivedT>, public detail::PtrUseVisitorBase { friend class InstVisitor<DerivedT>; - typedef InstVisitor<DerivedT> Base; + + using Base = InstVisitor<DerivedT>; public: PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) { @@ -283,6 +298,6 @@ protected: } }; -} +} // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_PTRUSEVISITOR_H diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index 2e34928b28ad..719622359949 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -254,7 +254,7 @@ public: template <class Tr> class RegionBase : public RegionNodeBase<Tr> { friend class RegionInfoBase<Tr>; - + using FuncT = typename Tr::FuncT; using BlockT = typename Tr::BlockT; using RegionInfoT = typename Tr::RegionInfoT; @@ -407,6 +407,11 @@ public: /// else NULL. BlockT *getExitingBlock() const; + /// @brief Collect all blocks of this region's single exit edge, if existing. + /// + /// @return True if this region contains all the predecessors of the exit. + bool getExitingBlocks(SmallVectorImpl<BlockT *> &Exitings) const; + /// @brief Is this a simple region? /// /// A region is simple if it has exactly one exit and one entry edge. diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index cd4ec0a03a9e..6e522354dd9b 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -178,6 +178,29 @@ typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getEnteringBlock() const { } template <class Tr> +bool RegionBase<Tr>::getExitingBlocks( + SmallVectorImpl<BlockT *> &Exitings) const { + bool CoverAll = true; + + if (!exit) + return CoverAll; + + for (PredIterTy PI = InvBlockTraits::child_begin(exit), + PE = InvBlockTraits::child_end(exit); + PI != PE; ++PI) { + BlockT *Pred = *PI; + if (contains(Pred)) { + Exitings.push_back(Pred); + continue; + } + + CoverAll = false; + } + + return CoverAll; +} + +template <class Tr> typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getExitingBlock() const { BlockT *exit = getExit(); BlockT *exitingBlock = nullptr; diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index d1b182755cf8..21b72f3e13c2 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -21,11 +21,21 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_H #define LLVM_ANALYSIS_SCALAREVOLUTION_H -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" @@ -33,30 +43,33 @@ #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> +#include <utility> namespace llvm { -class APInt; + class AssumptionCache; +class BasicBlock; class Constant; class ConstantInt; -class DominatorTree; -class Type; -class ScalarEvolution; class DataLayout; -class TargetLibraryInfo; +class DominatorTree; +class GEPOperator; +class Instruction; class LLVMContext; -class Operator; -class SCEV; +class raw_ostream; +class ScalarEvolution; class SCEVAddRecExpr; -class SCEVConstant; -class SCEVExpander; -class SCEVPredicate; class SCEVUnknown; -class Function; - -template <> struct FoldingSetTrait<SCEV>; -template <> struct FoldingSetTrait<SCEVPredicate>; +class StructType; +class TargetLibraryInfo; +class Type; +class Value; /// This class represents an analyzed expression in the program. These are /// opaque objects that the client is not allowed to do much with directly. @@ -74,11 +87,7 @@ class SCEV : public FoldingSetNode { protected: /// This field is initialized to zero and may be used in subclasses to store /// miscellaneous information. - unsigned short SubclassData; - -private: - SCEV(const SCEV &) = delete; - void operator=(const SCEV &) = delete; + unsigned short SubclassData = 0; public: /// NoWrapFlags are bitfield indices into SubclassData. @@ -108,24 +117,22 @@ public: }; explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) - : FastID(ID), SCEVType(SCEVTy), SubclassData(0) {} + : FastID(ID), SCEVType(SCEVTy) {} + SCEV(const SCEV &) = delete; + SCEV &operator=(const SCEV &) = delete; unsigned getSCEVType() const { return SCEVType; } /// Return the LLVM type of this SCEV expression. - /// Type *getType() const; /// Return true if the expression is a constant zero. - /// bool isZero() const; /// Return true if the expression is a constant one. - /// bool isOne() const; /// Return true if the expression is a constant all-ones value. - /// bool isAllOnesValue() const; /// Return true if the specified scev is negated, but not a constant. @@ -136,7 +143,6 @@ public: void print(raw_ostream &OS) const; /// This method is used for debugging. - /// void dump() const; }; @@ -144,10 +150,12 @@ public: // temporary FoldingSetNodeID values. template <> struct FoldingSetTrait<SCEV> : DefaultFoldingSetTrait<SCEV> { static void Profile(const SCEV &X, FoldingSetNodeID &ID) { ID = X.FastID; } + static bool Equals(const SCEV &X, const FoldingSetNodeID &ID, unsigned IDHash, FoldingSetNodeID &TempID) { return ID == X.FastID; } + static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) { return X.FastID.ComputeHash(); } @@ -221,7 +229,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) { // temporary FoldingSetNodeID values. template <> struct FoldingSetTrait<SCEVPredicate> : DefaultFoldingSetTrait<SCEVPredicate> { - static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) { ID = X.FastID; } @@ -230,6 +237,7 @@ struct FoldingSetTrait<SCEVPredicate> : DefaultFoldingSetTrait<SCEVPredicate> { unsigned IDHash, FoldingSetNodeID &TempID) { return ID == X.FastID; } + static unsigned ComputeHash(const SCEVPredicate &X, FoldingSetNodeID &TempID) { return X.FastID.ComputeHash(); @@ -351,6 +359,7 @@ public: /// Returns the set assumed no overflow flags. IncrementWrapFlags getFlags() const { return Flags; } + /// Implementation of the SCEVPredicate interface const SCEV *getExpr() const override; bool implies(const SCEVPredicate *N) const override; @@ -371,11 +380,12 @@ public: /// ScalarEvolution::Preds folding set. This is why the \c add function is sound. class SCEVUnionPredicate final : public SCEVPredicate { private: - typedef DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>> - PredicateMap; + using PredicateMap = + DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>>; /// Vector with references to all predicates in this union. SmallVector<const SCEVPredicate *, 16> Preds; + /// Maps SCEVs to predicates for quick look-ups. PredicateMap SCEVToPreds; @@ -409,6 +419,35 @@ public: } }; +struct ExitLimitQuery { + ExitLimitQuery(const Loop *L, BasicBlock *ExitingBlock, bool AllowPredicates) + : L(L), ExitingBlock(ExitingBlock), AllowPredicates(AllowPredicates) {} + + const Loop *L; + BasicBlock *ExitingBlock; + bool AllowPredicates; +}; + +template <> struct DenseMapInfo<ExitLimitQuery> { + static inline ExitLimitQuery getEmptyKey() { + return ExitLimitQuery(nullptr, nullptr, true); + } + + static inline ExitLimitQuery getTombstoneKey() { + return ExitLimitQuery(nullptr, nullptr, false); + } + + static unsigned getHashValue(ExitLimitQuery Val) { + return hash_combine(hash_combine(Val.L, Val.ExitingBlock), + Val.AllowPredicates); + } + + static bool isEqual(ExitLimitQuery LHS, ExitLimitQuery RHS) { + return LHS.L == RHS.L && LHS.ExitingBlock == RHS.ExitingBlock && + LHS.AllowPredicates == RHS.AllowPredicates; + } +}; + /// The main scalar evolution driver. Because client code (intentionally) /// can't do much with the SCEV objects directly, they must ask this class /// for services. @@ -443,11 +482,542 @@ public: return (SCEV::NoWrapFlags)(Flags & ~OffFlags); } + ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree &DT, LoopInfo &LI); + ScalarEvolution(ScalarEvolution &&Arg); + ~ScalarEvolution(); + + LLVMContext &getContext() const { return F.getContext(); } + + /// Test if values of the given type are analyzable within the SCEV + /// framework. This primarily includes integer types, and it can optionally + /// include pointer types if the ScalarEvolution class has access to + /// target-specific information. + bool isSCEVable(Type *Ty) const; + + /// Return the size in bits of the specified type, for which isSCEVable must + /// return true. + uint64_t getTypeSizeInBits(Type *Ty) const; + + /// Return a type with the same bitwidth as the given type and which + /// represents how SCEV will treat the given type, for which isSCEVable must + /// return true. For pointer types, this is the pointer-sized integer type. + Type *getEffectiveSCEVType(Type *Ty) const; + + // Returns a wider type among {Ty1, Ty2}. + Type *getWiderType(Type *Ty1, Type *Ty2) const; + + /// Return true if the SCEV is a scAddRecExpr or it contains + /// scAddRecExpr. The result will be cached in HasRecMap. + bool containsAddRecurrence(const SCEV *S); + + /// Erase Value from ValueExprMap and ExprValueMap. + void eraseValueFromMap(Value *V); + + /// Return a SCEV expression for the full generality of the specified + /// expression. + const SCEV *getSCEV(Value *V); + + const SCEV *getConstant(ConstantInt *V); + const SCEV *getConstant(const APInt &Val); + const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); + const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); + const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); + const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); + const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); + const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; + return getAddExpr(Ops, Flags, Depth); + } + const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2}; + return getAddExpr(Ops, Flags, Depth); + } + const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); + const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; + return getMulExpr(Ops, Flags, Depth); + } + const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { + SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2}; + return getMulExpr(Ops, Flags, Depth); + } + const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getURemExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, + SCEV::NoWrapFlags Flags); + const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, SCEV::NoWrapFlags Flags); + const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands, + const Loop *L, SCEV::NoWrapFlags Flags) { + SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end()); + return getAddRecExpr(NewOp, L, Flags); + } + + /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some + /// Predicates. If successful return these <AddRecExpr, Predicates>; + /// The function is intended to be called from PSCEV (the caller will decide + /// whether to actually add the predicates and carry out the rewrites). + Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> + createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); + + /// Returns an expression for a GEP + /// + /// \p GEP The GEP. The indices contained in the GEP itself are ignored, + /// instead we use IndexExprs. + /// \p IndexExprs The expressions for the indices. + const SCEV *getGEPExpr(GEPOperator *GEP, + const SmallVectorImpl<const SCEV *> &IndexExprs); + const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands); + const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMaxExpr(SmallVectorImpl<const SCEV *> &Operands); + const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUnknown(Value *V); + const SCEV *getCouldNotCompute(); + + /// Return a SCEV for the constant 0 of a specific type. + const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); } + + /// Return a SCEV for the constant 1 of a specific type. + const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); } + + /// Return an expression for sizeof AllocTy that is type IntTy + const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); + + /// Return an expression for offsetof on the given field with type IntTy + const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); + + /// Return the SCEV object corresponding to -V. + const SCEV *getNegativeSCEV(const SCEV *V, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + + /// Return the SCEV object corresponding to ~V. + const SCEV *getNotSCEV(const SCEV *V); + + /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. + const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. + const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. + const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is zero extended. The + /// conversion must not be narrowing. + const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is sign extended. The + /// conversion must not be narrowing. + const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. If the type must be extended, it is extended with + /// unspecified bits. The conversion must not be narrowing. + const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty); + + /// Return a SCEV corresponding to a conversion of the input value to the + /// specified type. The conversion must not be widening. + const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty); + + /// Promote the operands to the wider of the types using zero-extension, and + /// then perform a umax operation with them. + const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); + + /// Promote the operands to the wider of the types using zero-extension, and + /// then perform a umin operation with them. + const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); + + /// Transitively follow the chain of pointer-type operands until reaching a + /// SCEV that does not have a single pointer operand. This returns a + /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner + /// cases do exist. + const SCEV *getPointerBase(const SCEV *V); + + /// Return a SCEV expression for the specified value at the specified scope + /// in the program. The L value specifies a loop nest to evaluate the + /// expression at, where null is the top-level or a specified loop is + /// immediately inside of the loop. + /// + /// This method can be used to compute the exit value for a variable defined + /// in a loop by querying what the value will hold in the parent loop. + /// + /// In the case that a relevant loop exit value cannot be computed, the + /// original value V is returned. + const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L); + + /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L). + const SCEV *getSCEVAtScope(Value *V, const Loop *L); + + /// Test whether entry to the loop is protected by a conditional between LHS + /// and RHS. This is used to help avoid max expressions in loop trip + /// counts, and to eliminate casts. + bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Test whether the backedge of the loop is protected by a conditional + /// between LHS and RHS. This is used to to eliminate casts. + bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + + /// Returns the maximum trip count of the loop if it is a single-exit + /// loop and we can compute a small maximum for that loop. + /// + /// Implemented in terms of the \c getSmallConstantTripCount overload with + /// the single exiting block passed to it. See that routine for details. + unsigned getSmallConstantTripCount(const Loop *L); + + /// Returns the maximum trip count of this loop as a normal unsigned + /// value. Returns 0 if the trip count is unknown or not constant. This + /// "trip count" assumes that control exits via ExitingBlock. More + /// precisely, it is the number of times that control may reach ExitingBlock + /// before taking the branch. For loops with multiple exits, it may not be + /// the number times that the loop header executes if the loop exits + /// prematurely via another branch. + unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock); + + /// Returns the upper bound of the loop trip count as a normal unsigned + /// value. + /// Returns 0 if the trip count is unknown or not constant. + unsigned getSmallConstantMaxTripCount(const Loop *L); + + /// Returns the largest constant divisor of the trip count of the + /// loop if it is a single-exit loop and we can compute a small maximum for + /// that loop. + /// + /// Implemented in terms of the \c getSmallConstantTripMultiple overload with + /// the single exiting block passed to it. See that routine for details. + unsigned getSmallConstantTripMultiple(const Loop *L); + + /// Returns the largest constant divisor of the trip count of this loop as a + /// normal unsigned value, if possible. This means that the actual trip + /// count is always a multiple of the returned value (don't forget the trip + /// count could very well be zero as well!). As explained in the comments + /// for getSmallConstantTripCount, this assumes that control exits the loop + /// via ExitingBlock. + unsigned getSmallConstantTripMultiple(const Loop *L, + BasicBlock *ExitingBlock); + + /// Get the expression for the number of loop iterations for which this loop + /// is guaranteed not to exit via ExitingBlock. Otherwise return + /// SCEVCouldNotCompute. + const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock); + + /// If the specified loop has a predictable backedge-taken count, return it, + /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count is + /// the number of times the loop header will be branched to from within the + /// loop, assuming there are no abnormal exists like exception throws. This is + /// one less than the trip count of the loop, since it doesn't count the first + /// iteration, when the header is branched to from outside the loop. + /// + /// Note that it is not valid to call this method on a loop without a + /// loop-invariant backedge-taken count (see + /// hasLoopInvariantBackedgeTakenCount). + const SCEV *getBackedgeTakenCount(const Loop *L); + + /// Similar to getBackedgeTakenCount, except it will add a set of + /// SCEV predicates to Predicates that are required to be true in order for + /// the answer to be correct. Predicates can be checked with run-time + /// checks and can be used to perform loop versioning. + const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, + SCEVUnionPredicate &Predicates); + + /// When successful, this returns a SCEVConstant that is greater than or equal + /// to (i.e. a "conservative over-approximation") of the value returend by + /// getBackedgeTakenCount. If such a value cannot be computed, it returns the + /// SCEVCouldNotCompute object. + const SCEV *getMaxBackedgeTakenCount(const Loop *L); + + /// Return true if the backedge taken count is either the value returned by + /// getMaxBackedgeTakenCount or zero. + bool isBackedgeTakenCountMaxOrZero(const Loop *L); + + /// Return true if the specified loop has an analyzable loop-invariant + /// backedge-taken count. + bool hasLoopInvariantBackedgeTakenCount(const Loop *L); + + /// This method should be called by the client when it has changed a loop in + /// a way that may effect ScalarEvolution's ability to compute a trip count, + /// or if the loop is deleted. This call is potentially expensive for large + /// loop bodies. + void forgetLoop(const Loop *L); + + /// This method should be called by the client when it has changed a value + /// in a way that may effect its value, or which may disconnect it from a + /// def-use chain linking it to a loop. + void forgetValue(Value *V); + + /// Called when the client has changed the disposition of values in + /// this loop. + /// + /// We don't have a way to invalidate per-loop dispositions. Clear and + /// recompute is simpler. + void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); } + + /// Determine the minimum number of zero bits that S is guaranteed to end in + /// (at every loop iteration). It is, at the same time, the minimum number + /// of times S is divisible by 2. For example, given {4,+,8} it returns 2. + /// If S is guaranteed to be 0, it returns the bitwidth of S. + uint32_t GetMinTrailingZeros(const SCEV *S); + + /// Determine the unsigned range for a particular SCEV. + /// NOTE: This returns a copy of the reference returned by getRangeRef. + ConstantRange getUnsignedRange(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED); + } + + /// Determine the min of the unsigned range for a particular SCEV. + APInt getUnsignedRangeMin(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMin(); + } + + /// Determine the max of the unsigned range for a particular SCEV. + APInt getUnsignedRangeMax(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMax(); + } + + /// Determine the signed range for a particular SCEV. + /// NOTE: This returns a copy of the reference returned by getRangeRef. + ConstantRange getSignedRange(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED); + } + + /// Determine the min of the signed range for a particular SCEV. + APInt getSignedRangeMin(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMin(); + } + + /// Determine the max of the signed range for a particular SCEV. + APInt getSignedRangeMax(const SCEV *S) { + return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMax(); + } + + /// Test if the given expression is known to be negative. + bool isKnownNegative(const SCEV *S); + + /// Test if the given expression is known to be positive. + bool isKnownPositive(const SCEV *S); + + /// Test if the given expression is known to be non-negative. + bool isKnownNonNegative(const SCEV *S); + + /// Test if the given expression is known to be non-positive. + bool isKnownNonPositive(const SCEV *S); + + /// Test if the given expression is known to be non-zero. + bool isKnownNonZero(const SCEV *S); + + /// Test if the given expression is known to satisfy the condition described + /// by Pred, LHS, and RHS. + bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + + /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" + /// is monotonically increasing or decreasing. In the former case set + /// `Increasing` to true and in the latter case set `Increasing` to false. + /// + /// A predicate is said to be monotonically increasing if may go from being + /// false to being true as the loop iterates, but never the other way + /// around. A predicate is said to be monotonically decreasing if may go + /// from being true to being false as the loop iterates, but never the other + /// way around. + bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, + bool &Increasing); + + /// Return true if the result of the predicate LHS `Pred` RHS is loop + /// invariant with respect to L. Set InvariantPred, InvariantLHS and + /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the + /// loop invariant form of LHS `Pred` RHS. + bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const Loop *L, + ICmpInst::Predicate &InvariantPred, + const SCEV *&InvariantLHS, + const SCEV *&InvariantRHS); + + /// Simplify LHS and RHS in a comparison with predicate Pred. Return true + /// iff any changes were made. If the operands are provably equal or + /// unequal, LHS and RHS are set to the same value and Pred is set to either + /// ICMP_EQ or ICMP_NE. + bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, + const SCEV *&RHS, unsigned Depth = 0); + + /// Return the "disposition" of the given SCEV with respect to the given + /// loop. + LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L); + + /// Return true if the value of the given SCEV is unchanging in the + /// specified loop. + bool isLoopInvariant(const SCEV *S, const Loop *L); + + /// Determine if the SCEV can be evaluated at loop's entry. It is true if it + /// doesn't depend on a SCEVUnknown of an instruction which is dominated by + /// the header of loop L. + bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L); + + /// Return true if the given SCEV changes value in a known way in the + /// specified loop. This property being true implies that the value is + /// variant in the loop AND that we can emit an expression to compute the + /// value of the expression at any particular loop iteration. + bool hasComputableLoopEvolution(const SCEV *S, const Loop *L); + + /// Return the "disposition" of the given SCEV with respect to the given + /// block. + BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB); + + /// Return true if elements that makes up the given SCEV dominate the + /// specified basic block. + bool dominates(const SCEV *S, const BasicBlock *BB); + + /// Return true if elements that makes up the given SCEV properly dominate + /// the specified basic block. + bool properlyDominates(const SCEV *S, const BasicBlock *BB); + + /// Test whether the given SCEV has Op as a direct or indirect operand. + bool hasOperand(const SCEV *S, const SCEV *Op) const; + + /// Return the size of an element read or written by Inst. + const SCEV *getElementSize(Instruction *Inst); + + /// Compute the array dimensions Sizes from the set of Terms extracted from + /// the memory access function of this SCEVAddRecExpr (second step of + /// delinearization). + void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize); + + void print(raw_ostream &OS) const; + void verify() const; + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + /// Collect parametric terms occurring in step expressions (first step of + /// delinearization). + void collectParametricTerms(const SCEV *Expr, + SmallVectorImpl<const SCEV *> &Terms); + + /// Return in Subscripts the access functions for each dimension in Sizes + /// (third step of delinearization). + void computeAccessFunctions(const SCEV *Expr, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes); + + /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the + /// subscripts and sizes of an array access. + /// + /// The delinearization is a 3 step process: the first two steps compute the + /// sizes of each subscript and the third step computes the access functions + /// for the delinearized array: + /// + /// 1. Find the terms in the step functions + /// 2. Compute the array size + /// 3. Compute the access function: divide the SCEV by the array size + /// starting with the innermost dimensions found in step 2. The Quotient + /// is the SCEV to be divided in the next step of the recursion. The + /// Remainder is the subscript of the innermost dimension. Loop over all + /// array dimensions computed in step 2. + /// + /// To compute a uniform array size for several memory accesses to the same + /// object, one can collect in step 1 all the step terms for all the memory + /// accesses, and compute in step 2 a unique array shape. This guarantees + /// that the array shape will be the same across all memory accesses. + /// + /// FIXME: We could derive the result of steps 1 and 2 from a description of + /// the array shape given in metadata. + /// + /// Example: + /// + /// A[][n][m] + /// + /// for i + /// for j + /// for k + /// A[j+k][2i][5i] = + /// + /// The initial SCEV: + /// + /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] + /// + /// 1. Find the different terms in the step functions: + /// -> [2*m, 5, n*m, n*m] + /// + /// 2. Compute the array size: sort and unique them + /// -> [n*m, 2*m, 5] + /// find the GCD of all the terms = 1 + /// divide by the GCD and erase constant terms + /// -> [n*m, 2*m] + /// GCD = m + /// divide by GCD -> [n, 2] + /// remove constant terms + /// -> [n] + /// size of the array is A[unknown][n][m] + /// + /// 3. Compute the access function + /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m + /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k + /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k + /// The remainder is the subscript of the innermost array dimension: [5i]. + /// + /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n + /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k + /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k + /// The Remainder is the subscript of the next array dimension: [2i]. + /// + /// The subscript of the outermost dimension is the Quotient: [j+k]. + /// + /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. + void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize); + + /// Return the DataLayout associated with the module this SCEV instance is + /// operating on. + const DataLayout &getDataLayout() const { + return F.getParent()->getDataLayout(); + } + + const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); + + const SCEVPredicate * + getWrapPredicate(const SCEVAddRecExpr *AR, + SCEVWrapPredicate::IncrementWrapFlags AddedFlags); + + /// Re-writes the SCEV according to the Predicates in \p A. + const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L, + SCEVUnionPredicate &A); + /// Tries to convert the \p S expression to an AddRec expression, + /// adding additional predicates to \p Preds as required. + const SCEVAddRecExpr *convertSCEVToAddRecWithPredicates( + const SCEV *S, const Loop *L, + SmallPtrSetImpl<const SCEVPredicate *> &Preds); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. class SCEVCallbackVH final : public CallbackVH { ScalarEvolution *SE; + void deleted() override; void allUsesReplacedWith(Value *New) override; @@ -460,44 +1030,37 @@ private: friend class SCEVUnknown; /// The function we are analyzing. - /// Function &F; /// Does the module have any calls to the llvm.experimental.guard intrinsic /// at all? If this is false, we avoid doing work that will only help if /// thare are guards present in the IR. - /// bool HasGuards; /// The target library information for the target we are targeting. - /// TargetLibraryInfo &TLI; /// The tracker for @llvm.assume intrinsics in this function. AssumptionCache &AC; /// The dominator tree. - /// DominatorTree &DT; /// The loop information for the function we are currently analyzing. - /// LoopInfo &LI; /// This SCEV is used to represent unknown trip counts and things. std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute; - /// The typedef for HasRecMap. - /// - typedef DenseMap<const SCEV *, bool> HasRecMapType; + /// The type for HasRecMap. + using HasRecMapType = DenseMap<const SCEV *, bool>; /// This is a cache to record whether a SCEV contains any scAddRecExpr. HasRecMapType HasRecMap; - /// The typedef for ExprValueMap. - /// - typedef std::pair<Value *, ConstantInt *> ValueOffsetPair; - typedef DenseMap<const SCEV *, SetVector<ValueOffsetPair>> ExprValueMapType; + /// The type for ExprValueMap. + using ValueOffsetPair = std::pair<Value *, ConstantInt *>; + using ExprValueMapType = DenseMap<const SCEV *, SetVector<ValueOffsetPair>>; /// ExprValueMap -- This map records the original values from which /// the SCEV expr is generated from. @@ -521,13 +1084,11 @@ private: /// to V - Offset. ExprValueMapType ExprValueMap; - /// The typedef for ValueExprMap. - /// - typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *>> - ValueExprMapType; + /// The type for ValueExprMap. + using ValueExprMapType = + DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *>>; /// This is a cache of the values we have analyzed so far. - /// ValueExprMapType ValueExprMap; /// Mark predicate values currently being processed by isImpliedCond. @@ -535,15 +1096,18 @@ private: /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of /// conditions dominating the backedge of a loop. - bool WalkingBEDominatingConds; + bool WalkingBEDominatingConds = false; /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a /// predicate by splitting it into a set of independent predicates. - bool ProvingSplitPredicate; + bool ProvingSplitPredicate = false; /// Memoized values for the GetMinTrailingZeros DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache; + /// Return the Value set from which the SCEV expr is generated. + SetVector<ValueOffsetPair> *getSCEVValues(const SCEV *S); + /// Private helper method for the GetMinTrailingZeros method uint32_t GetMinTrailingZerosImpl(const SCEV *S); @@ -554,7 +1118,9 @@ private: struct ExitLimit { const SCEV *ExactNotTaken; // The exit is not taken exactly this many times const SCEV *MaxNotTaken; // The exit is not taken at most this many times - bool MaxOrZero; // Not taken either exactly MaxNotTaken or zero times + + // Not taken either exactly MaxNotTaken or zero times + bool MaxOrZero = false; /// A set of predicate guards for this ExitLimit. The result is only valid /// if all of the predicates in \c Predicates evaluate to 'true' at @@ -584,6 +1150,8 @@ private: !isa<SCEVCouldNotCompute>(MaxNotTaken); } + bool hasOperand(const SCEV *S) const; + /// Test whether this ExitLimit contains all information. bool hasFullInfo() const { return !isa<SCEVCouldNotCompute>(ExactNotTaken); @@ -596,15 +1164,16 @@ private: PoisoningVH<BasicBlock> ExitingBlock; const SCEV *ExactNotTaken; std::unique_ptr<SCEVUnionPredicate> Predicate; - bool hasAlwaysTruePredicate() const { - return !Predicate || Predicate->isAlwaysTrue(); - } explicit ExitNotTakenInfo(PoisoningVH<BasicBlock> ExitingBlock, const SCEV *ExactNotTaken, std::unique_ptr<SCEVUnionPredicate> Predicate) : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken), Predicate(std::move(Predicate)) {} + + bool hasAlwaysTruePredicate() const { + return !Predicate || Predicate->isAlwaysTrue(); + } }; /// Information about the backedge-taken count of a loop. This currently @@ -625,7 +1194,7 @@ private: PointerIntPair<const SCEV *, 1> MaxAndComplete; /// True iff the backedge is taken either exactly Max or zero times. - bool MaxOrZero; + bool MaxOrZero = false; /// \name Helper projection functions on \c MaxAndComplete. /// @{ @@ -634,12 +1203,11 @@ private: /// @} public: - BackedgeTakenInfo() : MaxAndComplete(nullptr, 0), MaxOrZero(false) {} - + BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {} BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; - typedef std::pair<BasicBlock *, ExitLimit> EdgeExitInfo; + using EdgeExitInfo = std::pair<BasicBlock *, ExitLimit>; /// Initialize BackedgeTakenInfo from a list of exact exit counts. BackedgeTakenInfo(SmallVectorImpl<EdgeExitInfo> &&ExitCounts, bool Complete, @@ -826,7 +1394,6 @@ private: /// Implementation code for getSCEVAtScope; called at most once for each /// SCEV+Loop pair. - /// const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L); /// This looks up computed SCEV values for all instructions that depend on @@ -902,7 +1469,8 @@ private: const ExitLimit &EL); }; - typedef ExitLimitCache ExitLimitCacheTy; + using ExitLimitCacheTy = ExitLimitCache; + ExitLimit computeExitLimitFromCondCached(ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, @@ -1065,7 +1633,6 @@ private: /// Test if the given expression is known to satisfy the condition described /// by Pred and the known constant ranges of LHS and RHS. - /// bool isKnownPredicateViaConstantRanges(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); @@ -1110,7 +1677,6 @@ private: /// equivalent to proving no signed (resp. unsigned) wrap in /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr` /// (resp. `SCEVZeroExtendExpr`). - /// template <typename ExtendOpTy> bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L); @@ -1150,563 +1716,16 @@ private: /// add recurrence on the loop \p L. bool isAddRecNeverPoison(const Instruction *I, const Loop *L); -public: - ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, - DominatorTree &DT, LoopInfo &LI); - ~ScalarEvolution(); - ScalarEvolution(ScalarEvolution &&Arg); - - LLVMContext &getContext() const { return F.getContext(); } - - /// Test if values of the given type are analyzable within the SCEV - /// framework. This primarily includes integer types, and it can optionally - /// include pointer types if the ScalarEvolution class has access to - /// target-specific information. - bool isSCEVable(Type *Ty) const; - - /// Return the size in bits of the specified type, for which isSCEVable must - /// return true. - uint64_t getTypeSizeInBits(Type *Ty) const; - - /// Return a type with the same bitwidth as the given type and which - /// represents how SCEV will treat the given type, for which isSCEVable must - /// return true. For pointer types, this is the pointer-sized integer type. - Type *getEffectiveSCEVType(Type *Ty) const; - - // Returns a wider type among {Ty1, Ty2}. - Type *getWiderType(Type *Ty1, Type *Ty2) const; - - /// Return true if the SCEV is a scAddRecExpr or it contains - /// scAddRecExpr. The result will be cached in HasRecMap. - /// - bool containsAddRecurrence(const SCEV *S); - - /// Return the Value set from which the SCEV expr is generated. - SetVector<ValueOffsetPair> *getSCEVValues(const SCEV *S); - - /// Erase Value from ValueExprMap and ExprValueMap. - void eraseValueFromMap(Value *V); - - /// Return a SCEV expression for the full generality of the specified - /// expression. - const SCEV *getSCEV(Value *V); - - const SCEV *getConstant(ConstantInt *V); - const SCEV *getConstant(const APInt &Val); - const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); - const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); - const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); - const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); - const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); - const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, - unsigned Depth = 0); - const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, - unsigned Depth = 0) { - SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; - return getAddExpr(Ops, Flags, Depth); - } - const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, - unsigned Depth = 0) { - SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2}; - return getAddExpr(Ops, Flags, Depth); - } - const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, - unsigned Depth = 0); - const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, - unsigned Depth = 0) { - SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; - return getMulExpr(Ops, Flags, Depth); - } - const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, - unsigned Depth = 0) { - SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2}; - return getMulExpr(Ops, Flags, Depth); - } - const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, - SCEV::NoWrapFlags Flags); - const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, - const Loop *L, SCEV::NoWrapFlags Flags); - const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands, - const Loop *L, SCEV::NoWrapFlags Flags) { - SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end()); - return getAddRecExpr(NewOp, L, Flags); - } - - /// Checks if \p SymbolicPHI can be rewritten as an AddRecExpr under some - /// Predicates. If successful return these <AddRecExpr, Predicates>; - /// The function is intended to be called from PSCEV (the caller will decide - /// whether to actually add the predicates and carry out the rewrites). - Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> - createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI); - - /// Returns an expression for a GEP - /// - /// \p GEP The GEP. The indices contained in the GEP itself are ignored, - /// instead we use IndexExprs. - /// \p IndexExprs The expressions for the indices. - const SCEV *getGEPExpr(GEPOperator *GEP, - const SmallVectorImpl<const SCEV *> &IndexExprs); - const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands); - const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getUMaxExpr(SmallVectorImpl<const SCEV *> &Operands); - const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getUnknown(Value *V); - const SCEV *getCouldNotCompute(); - - /// Return a SCEV for the constant 0 of a specific type. - const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); } - - /// Return a SCEV for the constant 1 of a specific type. - const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); } - - /// Return an expression for sizeof AllocTy that is type IntTy - /// - const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); - - /// Return an expression for offsetof on the given field with type IntTy - /// - const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); - - /// Return the SCEV object corresponding to -V. - /// - const SCEV *getNegativeSCEV(const SCEV *V, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); - - /// Return the SCEV object corresponding to ~V. - /// - const SCEV *getNotSCEV(const SCEV *V); - - /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. - const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, - unsigned Depth = 0); - - /// Return a SCEV corresponding to a conversion of the input value to the - /// specified type. If the type must be extended, it is zero extended. - const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty); - - /// Return a SCEV corresponding to a conversion of the input value to the - /// specified type. If the type must be extended, it is sign extended. - const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty); - - /// Return a SCEV corresponding to a conversion of the input value to the - /// specified type. If the type must be extended, it is zero extended. The - /// conversion must not be narrowing. - const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty); - - /// Return a SCEV corresponding to a conversion of the input value to the - /// specified type. If the type must be extended, it is sign extended. The - /// conversion must not be narrowing. - const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty); - - /// Return a SCEV corresponding to a conversion of the input value to the - /// specified type. If the type must be extended, it is extended with - /// unspecified bits. The conversion must not be narrowing. - const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty); - - /// Return a SCEV corresponding to a conversion of the input value to the - /// specified type. The conversion must not be widening. - const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty); - - /// Promote the operands to the wider of the types using zero-extension, and - /// then perform a umax operation with them. - const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); - - /// Promote the operands to the wider of the types using zero-extension, and - /// then perform a umin operation with them. - const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); - - /// Transitively follow the chain of pointer-type operands until reaching a - /// SCEV that does not have a single pointer operand. This returns a - /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner - /// cases do exist. - const SCEV *getPointerBase(const SCEV *V); - - /// Return a SCEV expression for the specified value at the specified scope - /// in the program. The L value specifies a loop nest to evaluate the - /// expression at, where null is the top-level or a specified loop is - /// immediately inside of the loop. - /// - /// This method can be used to compute the exit value for a variable defined - /// in a loop by querying what the value will hold in the parent loop. - /// - /// In the case that a relevant loop exit value cannot be computed, the - /// original value V is returned. - const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L); - - /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L). - const SCEV *getSCEVAtScope(Value *V, const Loop *L); - - /// Test whether entry to the loop is protected by a conditional between LHS - /// and RHS. This is used to help avoid max expressions in loop trip - /// counts, and to eliminate casts. - bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS); - - /// Test whether the backedge of the loop is protected by a conditional - /// between LHS and RHS. This is used to to eliminate casts. - bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS); - - /// Returns the maximum trip count of the loop if it is a single-exit - /// loop and we can compute a small maximum for that loop. - /// - /// Implemented in terms of the \c getSmallConstantTripCount overload with - /// the single exiting block passed to it. See that routine for details. - unsigned getSmallConstantTripCount(const Loop *L); - - /// Returns the maximum trip count of this loop as a normal unsigned - /// value. Returns 0 if the trip count is unknown or not constant. This - /// "trip count" assumes that control exits via ExitingBlock. More - /// precisely, it is the number of times that control may reach ExitingBlock - /// before taking the branch. For loops with multiple exits, it may not be - /// the number times that the loop header executes if the loop exits - /// prematurely via another branch. - unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock); - - /// Returns the upper bound of the loop trip count as a normal unsigned - /// value. - /// Returns 0 if the trip count is unknown or not constant. - unsigned getSmallConstantMaxTripCount(const Loop *L); - - /// Returns the largest constant divisor of the trip count of the - /// loop if it is a single-exit loop and we can compute a small maximum for - /// that loop. - /// - /// Implemented in terms of the \c getSmallConstantTripMultiple overload with - /// the single exiting block passed to it. See that routine for details. - unsigned getSmallConstantTripMultiple(const Loop *L); - - /// Returns the largest constant divisor of the trip count of this loop as a - /// normal unsigned value, if possible. This means that the actual trip - /// count is always a multiple of the returned value (don't forget the trip - /// count could very well be zero as well!). As explained in the comments - /// for getSmallConstantTripCount, this assumes that control exits the loop - /// via ExitingBlock. - unsigned getSmallConstantTripMultiple(const Loop *L, - BasicBlock *ExitingBlock); - - /// Get the expression for the number of loop iterations for which this loop - /// is guaranteed not to exit via ExitingBlock. Otherwise return - /// SCEVCouldNotCompute. - const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock); - - /// If the specified loop has a predictable backedge-taken count, return it, - /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count is - /// the number of times the loop header will be branched to from within the - /// loop, assuming there are no abnormal exists like exception throws. This is - /// one less than the trip count of the loop, since it doesn't count the first - /// iteration, when the header is branched to from outside the loop. - /// - /// Note that it is not valid to call this method on a loop without a - /// loop-invariant backedge-taken count (see - /// hasLoopInvariantBackedgeTakenCount). - /// - const SCEV *getBackedgeTakenCount(const Loop *L); - - /// Similar to getBackedgeTakenCount, except it will add a set of - /// SCEV predicates to Predicates that are required to be true in order for - /// the answer to be correct. Predicates can be checked with run-time - /// checks and can be used to perform loop versioning. - const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, - SCEVUnionPredicate &Predicates); - - /// When successful, this returns a SCEVConstant that is greater than or equal - /// to (i.e. a "conservative over-approximation") of the value returend by - /// getBackedgeTakenCount. If such a value cannot be computed, it returns the - /// SCEVCouldNotCompute object. - const SCEV *getMaxBackedgeTakenCount(const Loop *L); - - /// Return true if the backedge taken count is either the value returned by - /// getMaxBackedgeTakenCount or zero. - bool isBackedgeTakenCountMaxOrZero(const Loop *L); - - /// Return true if the specified loop has an analyzable loop-invariant - /// backedge-taken count. - bool hasLoopInvariantBackedgeTakenCount(const Loop *L); - - /// This method should be called by the client when it has changed a loop in - /// a way that may effect ScalarEvolution's ability to compute a trip count, - /// or if the loop is deleted. This call is potentially expensive for large - /// loop bodies. - void forgetLoop(const Loop *L); - - /// This method should be called by the client when it has changed a value - /// in a way that may effect its value, or which may disconnect it from a - /// def-use chain linking it to a loop. - void forgetValue(Value *V); - - /// Called when the client has changed the disposition of values in - /// this loop. - /// - /// We don't have a way to invalidate per-loop dispositions. Clear and - /// recompute is simpler. - void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); } - - /// Determine the minimum number of zero bits that S is guaranteed to end in - /// (at every loop iteration). It is, at the same time, the minimum number - /// of times S is divisible by 2. For example, given {4,+,8} it returns 2. - /// If S is guaranteed to be 0, it returns the bitwidth of S. - uint32_t GetMinTrailingZeros(const SCEV *S); - - /// Determine the unsigned range for a particular SCEV. - /// NOTE: This returns a copy of the reference returned by getRangeRef. - ConstantRange getUnsignedRange(const SCEV *S) { - return getRangeRef(S, HINT_RANGE_UNSIGNED); - } - - /// Determine the min of the unsigned range for a particular SCEV. - APInt getUnsignedRangeMin(const SCEV *S) { - return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMin(); - } - - /// Determine the max of the unsigned range for a particular SCEV. - APInt getUnsignedRangeMax(const SCEV *S) { - return getRangeRef(S, HINT_RANGE_UNSIGNED).getUnsignedMax(); - } - - /// Determine the signed range for a particular SCEV. - /// NOTE: This returns a copy of the reference returned by getRangeRef. - ConstantRange getSignedRange(const SCEV *S) { - return getRangeRef(S, HINT_RANGE_SIGNED); - } - - /// Determine the min of the signed range for a particular SCEV. - APInt getSignedRangeMin(const SCEV *S) { - return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMin(); - } - - /// Determine the max of the signed range for a particular SCEV. - APInt getSignedRangeMax(const SCEV *S) { - return getRangeRef(S, HINT_RANGE_SIGNED).getSignedMax(); - } - - /// Test if the given expression is known to be negative. - /// - bool isKnownNegative(const SCEV *S); - - /// Test if the given expression is known to be positive. - /// - bool isKnownPositive(const SCEV *S); - - /// Test if the given expression is known to be non-negative. - /// - bool isKnownNonNegative(const SCEV *S); - - /// Test if the given expression is known to be non-positive. - /// - bool isKnownNonPositive(const SCEV *S); - - /// Test if the given expression is known to be non-zero. - /// - bool isKnownNonZero(const SCEV *S); - - /// Test if the given expression is known to satisfy the condition described - /// by Pred, LHS, and RHS. - /// - bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS); - - /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" - /// is monotonically increasing or decreasing. In the former case set - /// `Increasing` to true and in the latter case set `Increasing` to false. - /// - /// A predicate is said to be monotonically increasing if may go from being - /// false to being true as the loop iterates, but never the other way - /// around. A predicate is said to be monotonically decreasing if may go - /// from being true to being false as the loop iterates, but never the other - /// way around. - bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, - bool &Increasing); - - /// Return true if the result of the predicate LHS `Pred` RHS is loop - /// invariant with respect to L. Set InvariantPred, InvariantLHS and - /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the - /// loop invariant form of LHS `Pred` RHS. - bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS, const Loop *L, - ICmpInst::Predicate &InvariantPred, - const SCEV *&InvariantLHS, - const SCEV *&InvariantRHS); - - /// Simplify LHS and RHS in a comparison with predicate Pred. Return true - /// iff any changes were made. If the operands are provably equal or - /// unequal, LHS and RHS are set to the same value and Pred is set to either - /// ICMP_EQ or ICMP_NE. - /// - bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS, - const SCEV *&RHS, unsigned Depth = 0); - - /// Return the "disposition" of the given SCEV with respect to the given - /// loop. - LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L); - - /// Return true if the value of the given SCEV is unchanging in the - /// specified loop. - bool isLoopInvariant(const SCEV *S, const Loop *L); - - /// Determine if the SCEV can be evaluated at loop's entry. It is true if it - /// doesn't depend on a SCEVUnknown of an instruction which is dominated by - /// the header of loop L. - bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L); - - /// Return true if the given SCEV changes value in a known way in the - /// specified loop. This property being true implies that the value is - /// variant in the loop AND that we can emit an expression to compute the - /// value of the expression at any particular loop iteration. - bool hasComputableLoopEvolution(const SCEV *S, const Loop *L); - - /// Return the "disposition" of the given SCEV with respect to the given - /// block. - BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB); - - /// Return true if elements that makes up the given SCEV dominate the - /// specified basic block. - bool dominates(const SCEV *S, const BasicBlock *BB); - - /// Return true if elements that makes up the given SCEV properly dominate - /// the specified basic block. - bool properlyDominates(const SCEV *S, const BasicBlock *BB); - - /// Test whether the given SCEV has Op as a direct or indirect operand. - bool hasOperand(const SCEV *S, const SCEV *Op) const; - - /// Return the size of an element read or written by Inst. - const SCEV *getElementSize(Instruction *Inst); - - /// Compute the array dimensions Sizes from the set of Terms extracted from - /// the memory access function of this SCEVAddRecExpr (second step of - /// delinearization). - void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, - SmallVectorImpl<const SCEV *> &Sizes, - const SCEV *ElementSize); - - void print(raw_ostream &OS) const; - void verify() const; - bool invalidate(Function &F, const PreservedAnalyses &PA, - FunctionAnalysisManager::Invalidator &Inv); - - /// Collect parametric terms occurring in step expressions (first step of - /// delinearization). - void collectParametricTerms(const SCEV *Expr, - SmallVectorImpl<const SCEV *> &Terms); - - /// Return in Subscripts the access functions for each dimension in Sizes - /// (third step of delinearization). - void computeAccessFunctions(const SCEV *Expr, - SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes); - - /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the - /// subscripts and sizes of an array access. - /// - /// The delinearization is a 3 step process: the first two steps compute the - /// sizes of each subscript and the third step computes the access functions - /// for the delinearized array: - /// - /// 1. Find the terms in the step functions - /// 2. Compute the array size - /// 3. Compute the access function: divide the SCEV by the array size - /// starting with the innermost dimensions found in step 2. The Quotient - /// is the SCEV to be divided in the next step of the recursion. The - /// Remainder is the subscript of the innermost dimension. Loop over all - /// array dimensions computed in step 2. - /// - /// To compute a uniform array size for several memory accesses to the same - /// object, one can collect in step 1 all the step terms for all the memory - /// accesses, and compute in step 2 a unique array shape. This guarantees - /// that the array shape will be the same across all memory accesses. - /// - /// FIXME: We could derive the result of steps 1 and 2 from a description of - /// the array shape given in metadata. - /// - /// Example: - /// - /// A[][n][m] - /// - /// for i - /// for j - /// for k - /// A[j+k][2i][5i] = - /// - /// The initial SCEV: - /// - /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] - /// - /// 1. Find the different terms in the step functions: - /// -> [2*m, 5, n*m, n*m] - /// - /// 2. Compute the array size: sort and unique them - /// -> [n*m, 2*m, 5] - /// find the GCD of all the terms = 1 - /// divide by the GCD and erase constant terms - /// -> [n*m, 2*m] - /// GCD = m - /// divide by GCD -> [n, 2] - /// remove constant terms - /// -> [n] - /// size of the array is A[unknown][n][m] - /// - /// 3. Compute the access function - /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m - /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k - /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k - /// The remainder is the subscript of the innermost array dimension: [5i]. - /// - /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n - /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k - /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k - /// The Remainder is the subscript of the next array dimension: [2i]. - /// - /// The subscript of the outermost dimension is the Quotient: [j+k]. - /// - /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. - void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes, - const SCEV *ElementSize); - - /// Return the DataLayout associated with the module this SCEV instance is - /// operating on. - const DataLayout &getDataLayout() const { - return F.getParent()->getDataLayout(); - } - - const SCEVPredicate *getEqualPredicate(const SCEV *LHS, const SCEV *RHS); - - const SCEVPredicate * - getWrapPredicate(const SCEVAddRecExpr *AR, - SCEVWrapPredicate::IncrementWrapFlags AddedFlags); - - /// Re-writes the SCEV according to the Predicates in \p A. - const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L, - SCEVUnionPredicate &A); - /// Tries to convert the \p S expression to an AddRec expression, - /// adding additional predicates to \p Preds as required. - const SCEVAddRecExpr *convertSCEVToAddRecWithPredicates( - const SCEV *S, const Loop *L, - SmallPtrSetImpl<const SCEVPredicate *> &Preds); - -private: - /// Similar to createAddRecFromPHI, but with the additional flexibility of + /// Similar to createAddRecFromPHI, but with the additional flexibility of /// suggesting runtime overflow checks in case casts are encountered. /// If successful, the analysis records that for this loop, \p SymbolicPHI, /// which is the UnknownSCEV currently representing the PHI, can be rewritten /// into an AddRec, assuming some predicates; The function then returns the /// AddRec and the predicates as a pair, and caches this pair in /// PredicatedSCEVRewrites. - /// If the analysis is not successful, a mapping from the \p SymbolicPHI to + /// If the analysis is not successful, a mapping from the \p SymbolicPHI to /// itself (with no predicates) is recorded, and a nullptr with an empty - /// predicates vector is returned as a pair. + /// predicates vector is returned as a pair. Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI); @@ -1715,6 +1734,16 @@ private: const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, bool Equality); + /// Compute the maximum backedge count based on the range of values + /// permitted by Start, End, and Stride. This is for loops of the form + /// {Start, +, Stride} LT End. + /// + /// Precondition: the induction variable is known to be positive. We *don't* + /// assert these preconditions so please be careful. + const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, + const SCEV *End, unsigned BitWidth, + bool IsSigned); + /// Verify if an linear IV with positive stride can overflow when in a /// less-than comparison, knowing the invariant term of the comparison, /// the stride and the knowledge of NSW/NUW flags on the recurrence. @@ -1735,31 +1764,39 @@ private: const SCEV *getOrCreateMulExpr(SmallVectorImpl<const SCEV *> &Ops, SCEV::NoWrapFlags Flags); -private: + /// Find all of the loops transitively used in \p S, and update \c LoopUsers + /// accordingly. + void addToLoopUseLists(const SCEV *S); + FoldingSet<SCEV> UniqueSCEVs; FoldingSet<SCEVPredicate> UniquePreds; BumpPtrAllocator SCEVAllocator; + /// This maps loops to a list of SCEV expressions that (transitively) use said + /// loop. + DenseMap<const Loop *, SmallVector<const SCEV *, 4>> LoopUsers; + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression /// they can be rewritten into under certain predicates. DenseMap<std::pair<const SCEVUnknown *, const Loop *>, std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> PredicatedSCEVRewrites; - + /// The head of a linked list of all SCEVUnknown values that have been /// allocated. This is used by releaseMemory to locate them all and call /// their destructors. - SCEVUnknown *FirstUnknown; + SCEVUnknown *FirstUnknown = nullptr; }; /// Analysis pass that exposes the \c ScalarEvolution for a function. class ScalarEvolutionAnalysis : public AnalysisInfoMixin<ScalarEvolutionAnalysis> { friend AnalysisInfoMixin<ScalarEvolutionAnalysis>; + static AnalysisKey Key; public: - typedef ScalarEvolution Result; + using Result = ScalarEvolution; ScalarEvolution run(Function &F, FunctionAnalysisManager &AM); }; @@ -1771,6 +1808,7 @@ class ScalarEvolutionPrinterPass public: explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; @@ -1808,6 +1846,7 @@ public: class PredicatedScalarEvolution { public: PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L); + const SCEVUnionPredicate &getUnionPredicate() const; /// Returns the SCEV expression of V, in the context of the current SCEV @@ -1845,6 +1884,11 @@ public: /// The printed text is indented by \p Depth. void print(raw_ostream &OS, unsigned Depth) const; + /// Check if \p AR1 and \p AR2 are equal, while taking into account + /// Equal predicates in Preds. + bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, + const SCEVAddRecExpr *AR2) const; + private: /// Increments the version number of the predicate. This needs to be called /// every time the SCEV predicate changes. @@ -1852,7 +1896,7 @@ private: /// Holds a SCEV and the version number of the SCEV predicate used to /// perform the rewrite of the expression. - typedef std::pair<unsigned, const SCEV *> RewriteEntry; + using RewriteEntry = std::pair<unsigned, const SCEV *>; /// Maps a SCEV to the rewrite result of that SCEV at a certain version /// number. If this number doesn't match the current Generation, we will @@ -1878,11 +1922,12 @@ private: /// expression we mark it with the version of the predicate. We use this to /// figure out if the predicate has changed from the last rewrite of the /// SCEV. If so, we need to perform a new rewrite. - unsigned Generation; + unsigned Generation = 0; /// The backedge taken count. - const SCEV *BackedgeCount; + const SCEV *BackedgeCount = nullptr; }; -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_SCALAREVOLUTION_H diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 7d16f34e54cb..4578e0da8ab2 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -27,9 +27,16 @@ namespace llvm { class TargetTransformInfo; /// Return true if the given expression is safe to expand in the sense that - /// all materialized values are safe to speculate. + /// all materialized values are safe to speculate anywhere their operands are + /// defined. bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); + /// Return true if the given expression is safe to expand in the sense that + /// all materialized values are defined and safe to speculate at the specified + /// location and their operands are defined at this location. + bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, + ScalarEvolution &SE); + /// This class uses information about analyze scalars to rewrite expressions /// in canonical form. /// diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index 56ddb5028d6d..acf83455cdcd 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -14,15 +14,27 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H #define LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include <cassert> +#include <cstddef> namespace llvm { - class ConstantInt; - class ConstantRange; - class DominatorTree; + +class APInt; +class Constant; +class ConstantRange; +class Loop; +class Type; enum SCEVTypes { // These should be ordered in terms of increasing complexity to make the @@ -37,8 +49,10 @@ namespace llvm { friend class ScalarEvolution; ConstantInt *V; + SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) : SCEV(ID, scConstant), V(v) {} + public: ConstantInt *getValue() const { return V; } const APInt &getAPInt() const { return getValue()->getValue(); } @@ -117,7 +131,6 @@ namespace llvm { } }; - /// This node is a base class providing common functionality for /// n'ary operators. class SCEVNAryExpr : public SCEV { @@ -135,13 +148,15 @@ namespace llvm { public: size_t getNumOperands() const { return NumOperands; } + const SCEV *getOperand(unsigned i) const { assert(i < NumOperands && "Operand index out of range!"); return Operands[i]; } - typedef const SCEV *const *op_iterator; - typedef iterator_range<op_iterator> op_range; + using op_iterator = const SCEV *const *; + using op_range = iterator_range<op_iterator>; + op_iterator op_begin() const { return Operands; } op_iterator op_end() const { return Operands + NumOperands; } op_range operands() const { @@ -198,15 +213,13 @@ namespace llvm { } }; - /// This node represents an addition of some number of SCEVs. class SCEVAddExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; SCEVAddExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) - : SCEVCommutativeExpr(ID, scAddExpr, O, N) { - } + : SCEVCommutativeExpr(ID, scAddExpr, O, N) {} public: Type *getType() const { @@ -222,15 +235,13 @@ namespace llvm { } }; - /// This node represents multiplication of some number of SCEVs. class SCEVMulExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; SCEVMulExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) - : SCEVCommutativeExpr(ID, scMulExpr, O, N) { - } + : SCEVCommutativeExpr(ID, scMulExpr, O, N) {} public: /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -239,13 +250,13 @@ namespace llvm { } }; - /// This class represents a binary unsigned division operation. class SCEVUDivExpr : public SCEV { friend class ScalarEvolution; const SCEV *LHS; const SCEV *RHS; + SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs) : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {} @@ -268,7 +279,6 @@ namespace llvm { } }; - /// This node represents a polynomial recurrence on the trip count /// of the specified loop. This is the primary focus of the /// ScalarEvolution framework; all the other SCEV subclasses are @@ -368,7 +378,6 @@ namespace llvm { } }; - /// This class represents an unsigned maximum selection. class SCEVUMaxExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; @@ -393,10 +402,6 @@ namespace llvm { class SCEVUnknown final : public SCEV, private CallbackVH { friend class ScalarEvolution; - // Implement CallbackVH. - void deleted() override; - void allUsesReplacedWith(Value *New) override; - /// The parent ScalarEvolution value. This is used to update the /// parent's maps when the value associated with a SCEVUnknown is /// deleted or RAUW'd. @@ -410,6 +415,10 @@ namespace llvm { ScalarEvolution *se, SCEVUnknown *next) : SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {} + // Implement CallbackVH. + void deleted() override; + void allUsesReplacedWith(Value *New) override; + public: Value *getValue() const { return getValPtr(); } @@ -490,6 +499,7 @@ namespace llvm { if (Visited.insert(S).second && Visitor.follow(S)) Worklist.push_back(S); } + public: SCEVTraversal(SV& V): Visitor(V) {} @@ -682,7 +692,7 @@ namespace llvm { } }; - typedef DenseMap<const Value*, Value*> ValueToValueMap; + using ValueToValueMap = DenseMap<const Value *, Value *>; /// The SCEVParameterRewriter takes a scalar evolution expression and updates /// the SCEVUnknown components following the Map (Value -> Value). @@ -714,26 +724,26 @@ namespace llvm { bool InterpretConsts; }; - typedef DenseMap<const Loop*, const SCEV*> LoopToScevMapT; + using LoopToScevMapT = DenseMap<const Loop *, const SCEV *>; /// The SCEVLoopAddRecRewriter takes a scalar evolution expression and applies /// the Map (Loop -> SCEV) to all AddRecExprs. class SCEVLoopAddRecRewriter : public SCEVRewriteVisitor<SCEVLoopAddRecRewriter> { public: + SCEVLoopAddRecRewriter(ScalarEvolution &SE, LoopToScevMapT &M) + : SCEVRewriteVisitor(SE), Map(M) {} + static const SCEV *rewrite(const SCEV *Scev, LoopToScevMapT &Map, ScalarEvolution &SE) { SCEVLoopAddRecRewriter Rewriter(SE, Map); return Rewriter.visit(Scev); } - SCEVLoopAddRecRewriter(ScalarEvolution &SE, LoopToScevMapT &M) - : SCEVRewriteVisitor(SE), Map(M) {} - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { SmallVector<const SCEV *, 2> Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(visit(Expr->getOperand(i))); + for (const SCEV *Op : Expr->operands()) + Operands.push_back(visit(Op)); const Loop *L = Expr->getLoop(); const SCEV *Res = SE.getAddRecExpr(Operands, L, Expr->getNoWrapFlags()); @@ -748,6 +758,7 @@ namespace llvm { private: LoopToScevMapT ⤅ }; -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_SCALAREVOLUTIONEXPRESSIONS_H diff --git a/include/llvm/Analysis/ScopedNoAliasAA.h b/include/llvm/Analysis/ScopedNoAliasAA.h index a7b57310d2d0..508968e16e5d 100644 --- a/include/llvm/Analysis/ScopedNoAliasAA.h +++ b/include/llvm/Analysis/ScopedNoAliasAA.h @@ -6,22 +6,27 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This is the interface for a metadata-based scoped no-alias analysis. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_SCOPEDNOALIASAA_H #define LLVM_ANALYSIS_SCOPEDNOALIASAA_H #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include <memory> namespace llvm { +class Function; +class MDNode; +class MemoryLocation; + /// A simple AA result which uses scoped-noalias metadata to answer queries. class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> { friend AAResultBase<ScopedNoAliasAAResult>; @@ -46,10 +51,11 @@ private: /// Analysis pass providing a never-invalidated alias analysis result. class ScopedNoAliasAA : public AnalysisInfoMixin<ScopedNoAliasAA> { friend AnalysisInfoMixin<ScopedNoAliasAA>; + static AnalysisKey Key; public: - typedef ScopedNoAliasAAResult Result; + using Result = ScopedNoAliasAAResult; ScopedNoAliasAAResult run(Function &F, FunctionAnalysisManager &AM); }; @@ -77,6 +83,7 @@ public: // scoped noalias analysis. // ImmutablePass *createScopedNoAliasAAWrapperPass(); -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_SCOPEDNOALIASAA_H diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index d1a54171d8bd..1b8df03b3a1b 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -15,37 +15,35 @@ #ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H #define LLVM_ANALYSIS_SPARSEPROPAGATION_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" #include <set> -#include <vector> + +#define DEBUG_TYPE "sparseprop" namespace llvm { -class Value; -class Constant; -class Argument; -class Instruction; -class PHINode; -class TerminatorInst; -class BasicBlock; -class Function; -class SparseSolver; -class raw_ostream; -template <typename T> class SmallVectorImpl; +/// A template for translating between LLVM Values and LatticeKeys. Clients must +/// provide a specialization of LatticeKeyInfo for their LatticeKey type. +template <class LatticeKey> struct LatticeKeyInfo { + // static inline Value *getValueFromLatticeKey(LatticeKey Key); + // static inline LatticeKey getLatticeKeyFromValue(Value *V); +}; -/// AbstractLatticeFunction - This class is implemented by the dataflow instance -/// to specify what the lattice values are and how they handle merges etc. -/// This gives the client the power to compute lattice values from instructions, -/// constants, etc. The requirement is that lattice values must all fit into -/// a void*. If a void* is not sufficient, the implementation should use this -/// pointer to be a pointer into a uniquing set or something. -/// -class AbstractLatticeFunction { -public: - typedef void *LatticeVal; +template <class LatticeKey, class LatticeVal, + class KeyInfo = LatticeKeyInfo<LatticeKey>> +class SparseSolver; +/// AbstractLatticeFunction - This class is implemented by the dataflow instance +/// to specify what the lattice values are and how they handle merges etc. This +/// gives the client the power to compute lattice values from instructions, +/// constants, etc. The current requirement is that lattice values must be +/// copyable. At the moment, nothing tries to avoid copying. Additionally, +/// lattice keys must be able to be used as keys of a mapping data structure. +/// Internally, the generic solver currently uses a DenseMap to map lattice keys +/// to lattice values. If the lattice key is a non-standard type, a +/// specialization of DenseMapInfo must be provided. +template <class LatticeKey, class LatticeVal> class AbstractLatticeFunction { private: LatticeVal UndefVal, OverdefinedVal, UntrackedVal; @@ -56,40 +54,28 @@ public: OverdefinedVal = overdefinedVal; UntrackedVal = untrackedVal; } - virtual ~AbstractLatticeFunction(); + + virtual ~AbstractLatticeFunction() = default; LatticeVal getUndefVal() const { return UndefVal; } LatticeVal getOverdefinedVal() const { return OverdefinedVal; } LatticeVal getUntrackedVal() const { return UntrackedVal; } - /// IsUntrackedValue - If the specified Value is something that is obviously - /// uninteresting to the analysis (and would always return UntrackedVal), - /// this function can return true to avoid pointless work. - virtual bool IsUntrackedValue(Value *V) { return false; } + /// IsUntrackedValue - If the specified LatticeKey is obviously uninteresting + /// to the analysis (i.e., it would always return UntrackedVal), this + /// function can return true to avoid pointless work. + virtual bool IsUntrackedValue(LatticeKey Key) { return false; } - /// ComputeConstant - Given a constant value, compute and return a lattice - /// value corresponding to the specified constant. - virtual LatticeVal ComputeConstant(Constant *C) { - return getOverdefinedVal(); // always safe + /// ComputeLatticeVal - Compute and return a LatticeVal corresponding to the + /// given LatticeKey. + virtual LatticeVal ComputeLatticeVal(LatticeKey Key) { + return getOverdefinedVal(); } /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is /// one that the we want to handle through ComputeInstructionState. virtual bool IsSpecialCasedPHI(PHINode *PN) { return false; } - /// GetConstant - If the specified lattice value is representable as an LLVM - /// constant value, return it. Otherwise return null. The returned value - /// must be in the same LLVM type as Val. - virtual Constant *GetConstant(LatticeVal LV, Value *Val, SparseSolver &SS) { - return nullptr; - } - - /// ComputeArgument - Given a formal argument value, compute and return a - /// lattice value corresponding to the specified argument. - virtual LatticeVal ComputeArgument(Argument *I) { - return getOverdefinedVal(); // always safe - } - /// MergeValues - Compute and return the merge of the two specified lattice /// values. Merging should only move one direction down the lattice to /// guarantee convergence (toward overdefined). @@ -97,67 +83,80 @@ public: return getOverdefinedVal(); // always safe, never useful. } - /// ComputeInstructionState - Given an instruction and a vector of its operand - /// values, compute the result value of the instruction. - virtual LatticeVal ComputeInstructionState(Instruction &I, SparseSolver &SS) { - return getOverdefinedVal(); // always safe, never useful. + /// ComputeInstructionState - Compute the LatticeKeys that change as a result + /// of executing instruction \p I. Their associated LatticeVals are store in + /// \p ChangedValues. + virtual void + ComputeInstructionState(Instruction &I, + DenseMap<LatticeKey, LatticeVal> &ChangedValues, + SparseSolver<LatticeKey, LatticeVal> &SS) = 0; + + /// PrintLatticeVal - Render the given LatticeVal to the specified stream. + virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); + + /// PrintLatticeKey - Render the given LatticeKey to the specified stream. + virtual void PrintLatticeKey(LatticeKey Key, raw_ostream &OS); + + /// GetValueFromLatticeVal - If the given LatticeVal is representable as an + /// LLVM value, return it; otherwise, return nullptr. If a type is given, the + /// returned value must have the same type. This function is used by the + /// generic solver in attempting to resolve branch and switch conditions. + virtual Value *GetValueFromLatticeVal(LatticeVal LV, Type *Ty = nullptr) { + return nullptr; } - - /// PrintValue - Render the specified lattice value to the specified stream. - virtual void PrintValue(LatticeVal V, raw_ostream &OS); }; /// SparseSolver - This class is a general purpose solver for Sparse Conditional /// Propagation with a programmable lattice function. -/// +template <class LatticeKey, class LatticeVal, class KeyInfo> class SparseSolver { - typedef AbstractLatticeFunction::LatticeVal LatticeVal; - /// LatticeFunc - This is the object that knows the lattice and how to do + /// LatticeFunc - This is the object that knows the lattice and how to /// compute transfer functions. - AbstractLatticeFunction *LatticeFunc; + AbstractLatticeFunction<LatticeKey, LatticeVal> *LatticeFunc; + + /// ValueState - Holds the LatticeVals associated with LatticeKeys. + DenseMap<LatticeKey, LatticeVal> ValueState; - DenseMap<Value *, LatticeVal> ValueState; // The state each value is in. - SmallPtrSet<BasicBlock *, 16> BBExecutable; // The bbs that are executable. + /// BBExecutable - Holds the basic blocks that are executable. + SmallPtrSet<BasicBlock *, 16> BBExecutable; - std::vector<Instruction *> InstWorkList; // Worklist of insts to process. + /// ValueWorkList - Holds values that should be processed. + SmallVector<Value *, 64> ValueWorkList; - std::vector<BasicBlock *> BBWorkList; // The BasicBlock work list + /// BBWorkList - Holds basic blocks that should be processed. + SmallVector<BasicBlock *, 64> BBWorkList; + + using Edge = std::pair<BasicBlock *, BasicBlock *>; /// KnownFeasibleEdges - Entries in this set are edges which have already had /// PHI nodes retriggered. - typedef std::pair<BasicBlock*,BasicBlock*> Edge; std::set<Edge> KnownFeasibleEdges; - SparseSolver(const SparseSolver&) = delete; - void operator=(const SparseSolver&) = delete; - public: - explicit SparseSolver(AbstractLatticeFunction *Lattice) + explicit SparseSolver( + AbstractLatticeFunction<LatticeKey, LatticeVal> *Lattice) : LatticeFunc(Lattice) {} - ~SparseSolver() { delete LatticeFunc; } + SparseSolver(const SparseSolver &) = delete; + SparseSolver &operator=(const SparseSolver &) = delete; /// Solve - Solve for constants and executable blocks. - /// - void Solve(Function &F); + void Solve(); - void Print(Function &F, raw_ostream &OS) const; + void Print(raw_ostream &OS) const; - /// getLatticeState - Return the LatticeVal object that corresponds to the - /// value. If an value is not in the map, it is returned as untracked, - /// unlike the getOrInitValueState method. - LatticeVal getLatticeState(Value *V) const { - DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V); + /// getExistingValueState - Return the LatticeVal object corresponding to the + /// given value from the ValueState map. If the value is not in the map, + /// UntrackedVal is returned, unlike the getValueState method. + LatticeVal getExistingValueState(LatticeKey Key) const { + auto I = ValueState.find(Key); return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal(); } - /// getOrInitValueState - Return the LatticeVal object that corresponds to the - /// value, initializing the value's state if it hasn't been entered into the - /// map yet. This function is necessary because not all values should start - /// out in the underdefined state... Arguments should be overdefined, and - /// constants should be marked as constants. - /// - LatticeVal getOrInitValueState(Value *V); + /// getValueState - Return the LatticeVal object corresponding to the given + /// value from the ValueState map. If the value is not in the map, its state + /// is initialized. + LatticeVal getValueState(LatticeKey Key); /// isEdgeFeasible - Return true if the control flow edge from the 'From' /// basic block to the 'To' basic block is currently feasible. If @@ -174,15 +173,16 @@ public: return BBExecutable.count(BB); } -private: - /// UpdateState - When the state for some instruction is potentially updated, - /// this function notices and adds I to the worklist if needed. - void UpdateState(Instruction &Inst, LatticeVal V); - /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void MarkBlockExecutable(BasicBlock *BB); +private: + /// UpdateState - When the state of some LatticeKey is potentially updated to + /// the given LatticeVal, this function notices and adds the LLVM value + /// corresponding the key to the work list, if needed. + void UpdateState(LatticeKey Key, LatticeVal LV); + /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB /// work list if it is not already executable. void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest); @@ -197,6 +197,334 @@ private: void visitTerminatorInst(TerminatorInst &TI); }; +//===----------------------------------------------------------------------===// +// AbstractLatticeFunction Implementation +//===----------------------------------------------------------------------===// + +template <class LatticeKey, class LatticeVal> +void AbstractLatticeFunction<LatticeKey, LatticeVal>::PrintLatticeVal( + LatticeVal V, raw_ostream &OS) { + if (V == UndefVal) + OS << "undefined"; + else if (V == OverdefinedVal) + OS << "overdefined"; + else if (V == UntrackedVal) + OS << "untracked"; + else + OS << "unknown lattice value"; +} + +template <class LatticeKey, class LatticeVal> +void AbstractLatticeFunction<LatticeKey, LatticeVal>::PrintLatticeKey( + LatticeKey Key, raw_ostream &OS) { + OS << "unknown lattice key"; +} + +//===----------------------------------------------------------------------===// +// SparseSolver Implementation +//===----------------------------------------------------------------------===// + +template <class LatticeKey, class LatticeVal, class KeyInfo> +LatticeVal +SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getValueState(LatticeKey Key) { + auto I = ValueState.find(Key); + if (I != ValueState.end()) + return I->second; // Common case, in the map + + if (LatticeFunc->IsUntrackedValue(Key)) + return LatticeFunc->getUntrackedVal(); + LatticeVal LV = LatticeFunc->ComputeLatticeVal(Key); + + // If this value is untracked, don't add it to the map. + if (LV == LatticeFunc->getUntrackedVal()) + return LV; + return ValueState[Key] = LV; +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::UpdateState(LatticeKey Key, + LatticeVal LV) { + auto I = ValueState.find(Key); + if (I != ValueState.end() && I->second == LV) + return; // No change. + + // Update the state of the given LatticeKey and add its corresponding LLVM + // value to the work list. + ValueState[Key] = LV; + if (Value *V = KeyInfo::getValueFromLatticeKey(Key)) + ValueWorkList.push_back(V); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::MarkBlockExecutable( + BasicBlock *BB) { + if (!BBExecutable.insert(BB).second) + return; + DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); + BBWorkList.push_back(BB); // Add the block to the work list! +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::markEdgeExecutable( + BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() << " -> " + << Dest->getName() << "\n"); + + if (BBExecutable.count(Dest)) { + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I) + visitPHINode(*cast<PHINode>(I)); + } else { + MarkBlockExecutable(Dest); + } +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::getFeasibleSuccessors( + TerminatorInst &TI, SmallVectorImpl<bool> &Succs, bool AggressiveUndef) { + Succs.resize(TI.getNumSuccessors()); + if (TI.getNumSuccessors() == 0) + return; + + if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + LatticeVal BCValue; + if (AggressiveUndef) + BCValue = + getValueState(KeyInfo::getLatticeKeyFromValue(BI->getCondition())); + else + BCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(BI->getCondition())); + + if (BCValue == LatticeFunc->getOverdefinedVal() || + BCValue == LatticeFunc->getUntrackedVal()) { + // Overdefined condition variables can branch either way. + Succs[0] = Succs[1] = true; + return; + } + + // If undefined, neither is feasible yet. + if (BCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = + dyn_cast_or_null<Constant>(LatticeFunc->GetValueFromLatticeVal( + BCValue, BI->getCondition()->getType())); + if (!C || !isa<ConstantInt>(C)) { + // Non-constant values can go either way. + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way + Succs[C->isNullValue()] = true; + return; + } + + if (TI.isExceptional()) { + Succs.assign(Succs.size(), true); + return; + } + + if (isa<IndirectBrInst>(TI)) { + Succs.assign(Succs.size(), true); + return; + } + + SwitchInst &SI = cast<SwitchInst>(TI); + LatticeVal SCValue; + if (AggressiveUndef) + SCValue = getValueState(KeyInfo::getLatticeKeyFromValue(SI.getCondition())); + else + SCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(SI.getCondition())); + + if (SCValue == LatticeFunc->getOverdefinedVal() || + SCValue == LatticeFunc->getUntrackedVal()) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // If undefined, neither is feasible yet. + if (SCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = dyn_cast_or_null<Constant>(LatticeFunc->GetValueFromLatticeVal( + SCValue, SI.getCondition()->getType())); + if (!C || !isa<ConstantInt>(C)) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + SwitchInst::CaseHandle Case = *SI.findCaseValue(cast<ConstantInt>(C)); + Succs[Case.getSuccessorIndex()] = true; +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +bool SparseSolver<LatticeKey, LatticeVal, KeyInfo>::isEdgeFeasible( + BasicBlock *From, BasicBlock *To, bool AggressiveUndef) { + SmallVector<bool, 16> SuccFeasible; + TerminatorInst *TI = From->getTerminator(); + getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == To && SuccFeasible[i]) + return true; + + return false; +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitTerminatorInst( + TerminatorInst &TI) { + SmallVector<bool, 16> SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible, true); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + DenseMap<LatticeKey, LatticeVal> ChangedValues; + LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(ChangedValue.first, ChangedValue.second); + return; + } + + LatticeKey Key = KeyInfo::getLatticeKeyFromValue(&PN); + LatticeVal PNIV = getValueState(Key); + LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); + + // If this value is already overdefined (common) just return. + if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + UpdateState(Key, Overdefined); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the + // transfer function to give us the merge of the incoming values. + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // If the edge is not yet known to be feasible, it doesn't impact the PHI. + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) + continue; + + // Merge in this value. + LatticeVal OpVal = + getValueState(KeyInfo::getLatticeKeyFromValue(PN.getIncomingValue(i))); + if (OpVal != PNIV) + PNIV = LatticeFunc->MergeValues(PNIV, OpVal); + + if (PNIV == Overdefined) + break; // Rest of input values don't matter. + } + + // Update the PHI with the compute value, which is the merge of the inputs. + UpdateState(Key, PNIV); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::visitInst(Instruction &I) { + // PHIs are handled by the propagation logic, they are never passed into the + // transfer functions. + if (PHINode *PN = dyn_cast<PHINode>(&I)) + return visitPHINode(*PN); + + // Otherwise, ask the transfer function what the result is. If this is + // something that we care about, remember it. + DenseMap<LatticeKey, LatticeVal> ChangedValues; + LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(ChangedValue.first, ChangedValue.second); + + if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I)) + visitTerminatorInst(*TI); +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Solve() { + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !ValueWorkList.empty()) { + // Process the value work list. + while (!ValueWorkList.empty()) { + Value *V = ValueWorkList.back(); + ValueWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off V-WL: " << *V << "\n"); + + // "V" got into the work list because it made a transition. See if any + // users are both live and in need of updating. + for (User *U : V->users()) + if (Instruction *Inst = dyn_cast<Instruction>(U)) + if (BBExecutable.count(Inst->getParent())) // Inst is executable? + visitInst(*Inst); + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); + + // Notify all instructions in this basic block that they are newly + // executable. + for (Instruction &I : *BB) + visitInst(I); + } + } +} + +template <class LatticeKey, class LatticeVal, class KeyInfo> +void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Print( + raw_ostream &OS) const { + if (ValueState.empty()) + return; + + LatticeKey Key; + LatticeVal LV; + + OS << "ValueState:\n"; + for (auto &Entry : ValueState) { + std::tie(Key, LV) = Entry; + if (LV == LatticeFunc->getUntrackedVal()) + continue; + OS << "\t"; + LatticeFunc->PrintLatticeVal(LV, OS); + OS << ": "; + LatticeFunc->PrintLatticeKey(Key, OS); + OS << "\n"; + } +} } // end namespace llvm +#undef DEBUG_TYPE + #endif // LLVM_ANALYSIS_SPARSEPROPAGATION_H diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 9cbe917c146d..a461ed813b9b 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -457,6 +457,15 @@ TLI_DEFINE_STRING_INTERNAL("bcopy") /// void bzero(void *s, size_t n); TLI_DEFINE_ENUM_INTERNAL(bzero) TLI_DEFINE_STRING_INTERNAL("bzero") +/// double cabs(double complex z) +TLI_DEFINE_ENUM_INTERNAL(cabs) +TLI_DEFINE_STRING_INTERNAL("cabs") +/// float cabs(float complex z) +TLI_DEFINE_ENUM_INTERNAL(cabsf) +TLI_DEFINE_STRING_INTERNAL("cabsf") +/// long double cabs(long double complex z) +TLI_DEFINE_ENUM_INTERNAL(cabsl) +TLI_DEFINE_STRING_INTERNAL("cabsl") /// void *calloc(size_t count, size_t size); TLI_DEFINE_ENUM_INTERNAL(calloc) TLI_DEFINE_STRING_INTERNAL("calloc") diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index d75e7833279b..a3fe834022f7 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -193,13 +193,9 @@ public: ShouldSignExtI32Param = Val; } - /// Returns the size of the wchar_t type in bytes. + /// Returns the size of the wchar_t type in bytes or 0 if the size is unknown. + /// This queries the 'wchar_size' metadata. unsigned getWCharSize(const Module &M) const; - - /// Returns size of the default wchar_t type on target \p T. This is mostly - /// intended to verify that the size in the frontend matches LLVM. All other - /// queries should use getWCharSize() instead. - static unsigned getTargetWCharSize(const Triple &T); }; /// Provides information about what library functions are available for diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 24edd3826a2e..c20f20cfbe4d 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -23,21 +23,28 @@ #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #include "llvm/ADT/Optional.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" #include <functional> namespace llvm { +namespace Intrinsic { +enum ID : unsigned; +} + class Function; class GlobalValue; +class IntrinsicInst; +class LoadInst; class Loop; -class ScalarEvolution; class SCEV; +class ScalarEvolution; +class StoreInst; +class SwitchInst; class Type; class User; class Value; @@ -107,6 +114,37 @@ public: /// \name Generic Target Information /// @{ + /// \brief The kind of cost model. + /// + /// There are several different cost models that can be customized by the + /// target. The normalization of each cost model may be target specific. + enum TargetCostKind { + TCK_RecipThroughput, ///< Reciprocal throughput. + TCK_Latency, ///< The latency of instruction. + TCK_CodeSize ///< Instruction code size. + }; + + /// \brief Query the cost of a specified instruction. + /// + /// Clients should use this interface to query the cost of an existing + /// instruction. The instruction must have a valid parent (basic block). + /// + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const { + switch (kind){ + case TCK_RecipThroughput: + return getInstructionThroughput(I); + + case TCK_Latency: + return getInstructionLatency(I); + + case TCK_CodeSize: + return getUserCost(I); + } + llvm_unreachable("Unknown instruction cost kind"); + } + /// \brief Underlying constants for 'cost' values in this interface. /// /// Many APIs in this interface return a cost. This enum defines the @@ -382,6 +420,8 @@ public: bool UpperBound; /// Allow peeling off loop iterations for loops with low dynamic tripcount. bool AllowPeeling; + /// Allow unrolling of all the iterations of the runtime loop remainder. + bool UnrollRemainder; }; /// \brief Get target-customized preferences for the generic loop unrolling @@ -420,10 +460,12 @@ public: /// this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. + /// If target returns true in LSRWithInstrQueries(), I may be valid. /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace = 0) const; + unsigned AddrSpace = 0, + Instruction *I = nullptr) const; /// \brief Return true if LSR cost of C1 is lower than C1. bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, @@ -440,6 +482,20 @@ public: bool isLegalMaskedScatter(Type *DataType) const; bool isLegalMaskedGather(Type *DataType) const; + /// Return true if the target has a unified operation to calculate division + /// and remainder. If so, the additional implicit multiplication and + /// subtraction required to calculate a remainder from division are free. This + /// can enable more aggressive transformations for division and remainder than + /// would typically be allowed using throughput or size cost models. + bool hasDivRemOp(Type *DataType, bool IsSigned) const; + + /// Return true if the given instruction (assumed to be a memory access + /// instruction) has a volatile variant. If that's the case then we can avoid + /// addrspacecast to generic AS for volatile loads/stores. Default + /// implementation returns false, which prevents address space inference for + /// volatile loads/stores. + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; + /// Return true if target doesn't mind addresses in vectors. bool prefersVectorizedAddressing() const; @@ -453,12 +509,12 @@ public: bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; - /// \brief Return true if target supports the load / store - /// instruction with the given Offset on the form reg + Offset. It - /// may be that Offset is too big for a certain type (register - /// class). - bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const; - + /// \brief Return true if the loop strength reduce pass should make + /// Instruction* based TTI queries to isLegalAddressingMode(). This is + /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned + /// immediate offset and no index register. + bool LSRWithInstrQueries() const; + /// \brief Return true if it's free to truncate a value of type Ty1 to type /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 /// by referencing its sub-register AX. @@ -498,8 +554,13 @@ public: /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; - /// \brief Enable inline expansion of memcmp - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const; + /// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is + /// true if this is the expansion of memcmp(p1, p2, s) == 0. + struct MemCmpExpansionOptions { + // The list of available load sizes (in bytes), sorted in decreasing order. + SmallVector<unsigned, 8> LoadSizes; + }; + const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const; /// \brief Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -525,6 +586,12 @@ public: /// \brief Return true if the hardware has a fast square-root instruction. bool haveFastSqrt(Type *Ty) const; + /// Return true if it is faster to check if a floating-point value is NaN + /// (or not-NaN) versus a comparison against a constant FP zero value. + /// Targets should override this if materializing a 0.0 for comparison is + /// generally as cheap as checking for ordered/unordered. + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const; + /// \brief Return the expected cost of supporting the floating point operation /// of the specified type. int getFPOpCost(Type *Ty) const; @@ -599,6 +666,22 @@ public: /// \return The size of a cache line in bytes. unsigned getCacheLineSize() const; + /// The possible cache levels + enum class CacheLevel { + L1D, // The L1 data cache + L2D, // The L2 data cache + + // We currently do not model L3 caches, as their sizes differ widely between + // microarchitectures. Also, we currently do not have a use for L3 cache + // size modeling yet. + }; + + /// \return The size of the cache level in bytes, if available. + llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const; + + /// \return The associativity of the cache level, if available. + llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const; + /// \return How much before a load we should place the prefetch instruction. /// This is currently measured in number of instructions. unsigned getPrefetchDistance() const; @@ -699,11 +782,14 @@ public: /// /// Pairwise: /// (v0, v1, v2, v3) - /// ((v0+v1), (v2, v3), undef, undef) + /// ((v0+v1), (v2+v3), undef, undef) /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) - int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const; + int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const; + int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, + bool IsUnsigned) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction @@ -776,12 +862,6 @@ public: unsigned SrcAlign, unsigned DestAlign) const; - /// \returns True if we want to test the new memcpy lowering functionality in - /// Transform/Utils. - /// Temporary. Will be removed once we move to the new functionality and - /// remove the old. - bool useWideIRMemcpyLoopLowering() const; - /// \returns True if the two functions have compatible attributes for inlining /// purposes. bool areInlineCompatible(const Function *Caller, @@ -838,6 +918,14 @@ public: /// @} private: + /// \brief Estimate the latency of specified instruction. + /// Returns 1 as the default value. + int getInstructionLatency(const Instruction *I) const; + + /// \brief Returns the expected throughput cost of the instruction. + /// Returns -1 if the cost is unknown. + int getInstructionThroughput(const Instruction *I) const; + /// \brief The abstract base class used to type erase specific TTI /// implementations. class Concept; @@ -882,18 +970,21 @@ public: virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) = 0; + unsigned AddrSpace, + Instruction *I) = 0; virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; + virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; - virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0; + virtual bool LSRWithInstrQueries() = 0; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool isTypeLegal(Type *Ty) = 0; @@ -907,7 +998,8 @@ public: unsigned VF) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; - virtual bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) = 0; + virtual const MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, @@ -917,6 +1009,7 @@ public: bool *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; + virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -931,6 +1024,8 @@ public: virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() = 0; + virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0; + virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0; virtual unsigned getPrefetchDistance() = 0; virtual unsigned getMinPrefetchStride() = 0; virtual unsigned getMaxPrefetchIterationsAhead() = 0; @@ -965,8 +1060,10 @@ public: ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace) = 0; - virtual int getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) = 0; + virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) = 0; + virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, bool IsUnsigned) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) = 0; @@ -1009,6 +1106,7 @@ public: virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; + virtual int getInstructionLatency(const Instruction *I) = 0; }; template <typename T> @@ -1085,9 +1183,10 @@ public: } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) override { + unsigned AddrSpace, + Instruction *I) override { return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + Scale, AddrSpace, I); } bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) override { @@ -1105,6 +1204,12 @@ public: bool isLegalMaskedGather(Type *DataType) override { return Impl.isLegalMaskedGather(DataType); } + bool hasDivRemOp(Type *DataType, bool IsSigned) override { + return Impl.hasDivRemOp(DataType, IsSigned); + } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { + return Impl.hasVolatileVariant(I, AddrSpace); + } bool prefersVectorizedAddressing() override { return Impl.prefersVectorizedAddressing(); } @@ -1114,8 +1219,8 @@ public: return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } - bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override { - return Impl.isFoldableMemAccessOffset(I, Offset); + bool LSRWithInstrQueries() override { + return Impl.LSRWithInstrQueries(); } bool isTruncateFree(Type *Ty1, Type *Ty2) override { return Impl.isTruncateFree(Ty1, Ty2); @@ -1148,8 +1253,9 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) override { - return Impl.expandMemCmp(I, MaxLoadSize); + const MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const override { + return Impl.enableMemCmpExpansion(IsZeroCmp); } bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); @@ -1168,6 +1274,10 @@ public: } bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override { + return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); + } + int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, @@ -1202,6 +1312,12 @@ public: unsigned getCacheLineSize() override { return Impl.getCacheLineSize(); } + llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override { + return Impl.getCacheSize(Level); + } + llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override { + return Impl.getCacheAssociativity(Level); + } unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } unsigned getMinPrefetchStride() override { return Impl.getMinPrefetchStride(); @@ -1267,10 +1383,14 @@ public: return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); } - int getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) override { - return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); + int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) override { + return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); } + int getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, bool IsUnsigned) override { + return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) override { return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, @@ -1360,6 +1480,9 @@ public: bool shouldExpandReduction(const IntrinsicInst *II) const override { return Impl.shouldExpandReduction(II); } + int getInstructionLatency(const Instruction *I) override { + return Impl.getInstructionLatency(I); + } }; template <typename T> diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 9bbda718acab..4c37402278ef 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -152,6 +152,7 @@ public: case Intrinsic::annotation: case Intrinsic::assume: + case Intrinsic::sideeffect: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::invariant_start: @@ -188,6 +189,8 @@ public: } bool isLoweredToCall(const Function *F) { + assert(F && "A concrete function must be provided to this routine."); + // FIXME: These should almost certainly not be handled here, and instead // handled with the help of TLI or the target itself. This was largely // ported from existing analysis heuristics here so that such refactorings @@ -230,7 +233,7 @@ public: bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) { + unsigned AddrSpace, Instruction *I = nullptr) { // Guess that only reg and reg+reg addressing is allowed. This heuristic is // taken from the implementation of LSR. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); @@ -251,6 +254,10 @@ public: bool isLegalMaskedGather(Type *DataType) { return false; } + bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } + + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } + bool prefersVectorizedAddressing() { return true; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, @@ -262,7 +269,7 @@ public: return -1; } - bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { return true; } + bool LSRWithInstrQueries() { return false; } bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } @@ -288,7 +295,10 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { return false; } + const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const { + return nullptr; + } bool enableInterleavedAccessVectorization() { return false; } @@ -306,6 +316,8 @@ public: bool haveFastSqrt(Type *Ty) { return false; } + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } + unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, @@ -340,6 +352,29 @@ public: unsigned getCacheLineSize() { return 0; } + llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { + switch (Level) { + case TargetTransformInfo::CacheLevel::L1D: + LLVM_FALLTHROUGH; + case TargetTransformInfo::CacheLevel::L2D: + return llvm::Optional<unsigned>(); + } + + llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); + } + + llvm::Optional<unsigned> getCacheAssociativity( + TargetTransformInfo::CacheLevel Level) { + switch (Level) { + case TargetTransformInfo::CacheLevel::L1D: + LLVM_FALLTHROUGH; + case TargetTransformInfo::CacheLevel::L2D: + return llvm::Optional<unsigned>(); + } + + llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); + } + unsigned getPrefetchDistance() { return 0; } unsigned getMinPrefetchStride() { return 1; } @@ -423,10 +458,12 @@ public: unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) { - return 0; + return 0; } - unsigned getReductionCost(unsigned, Type *, bool) { return 1; } + unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } + + unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } @@ -587,7 +624,7 @@ protected: APInt StrideVal = Step->getAPInt(); if (StrideVal.getBitWidth() > 64) return false; - // FIXME: need to take absolute value for negtive stride case + // FIXME: Need to take absolute value for negative stride case. return StrideVal.getSExtValue() < MergeDistance; } }; @@ -647,11 +684,13 @@ public: BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); } bool HasBaseReg = (BaseGV == nullptr); - int64_t BaseOffset = 0; + + auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); + APInt BaseOffset(PtrSizeBits, 0); int64_t Scale = 0; auto GTI = gep_type_begin(PointeeType, Operands); - Type *TargetType; + Type *TargetType = nullptr; // Handle the case where the GEP instruction has a single operand, // the basis, therefore TargetType is a nullptr. @@ -673,9 +712,10 @@ public: BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); } else { int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); - if (ConstIdx) - BaseOffset += ConstIdx->getSExtValue() * ElementSize; - else { + if (ConstIdx) { + BaseOffset += + ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; + } else { // Needs scale register. if (Scale != 0) // No addressing mode takes two scale registers. @@ -688,9 +728,10 @@ public: // Assumes the address space is 0 when Ptr is nullptr. unsigned AS = (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); + if (static_cast<T *>(this)->isLegalAddressingMode( - TargetType, const_cast<GlobalValue *>(BaseGV), BaseOffset, - HasBaseReg, Scale, AS)) + TargetType, const_cast<GlobalValue *>(BaseGV), + BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) return TTI::TCC_Free; return TTI::TCC_Basic; } @@ -713,6 +754,11 @@ public: if (isa<PHINode>(U)) return TTI::TCC_Free; // Model all PHI nodes as free. + // Static alloca doesn't generate target instructions. + if (auto *A = dyn_cast<AllocaInst>(U)) + if (A->isStaticAlloca()) + return TTI::TCC_Free; + if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), @@ -746,6 +792,38 @@ public: Operator::getOpcode(U), U->getType(), U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); } + + int getInstructionLatency(const Instruction *I) { + SmallVector<const Value *, 4> Operands(I->value_op_begin(), + I->value_op_end()); + if (getUserCost(I, Operands) == TTI::TCC_Free) + return 0; + + if (isa<LoadInst>(I)) + return 4; + + Type *DstTy = I->getType(); + + // Usually an intrinsic is a simple instruction. + // A real function call is much slower. + if (auto *CI = dyn_cast<CallInst>(I)) { + const Function *F = CI->getCalledFunction(); + if (!F || static_cast<T *>(this)->isLoweredToCall(F)) + return 40; + // Some intrinsics return a value and a flag, we use the value type + // to decide its latency. + if (StructType* StructTy = dyn_cast<StructType>(DstTy)) + DstTy = StructTy->getElementType(0); + // Fall through to simple instructions. + } + + if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) + DstTy = VectorTy->getElementType(); + if (DstTy->isFloatingPointTy()) + return 3; + + return 1; + } }; } diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h index bedd654c6521..b05d384ab1a3 100644 --- a/include/llvm/Analysis/Trace.h +++ b/include/llvm/Analysis/Trace.h @@ -22,39 +22,36 @@ #include <vector> namespace llvm { - class BasicBlock; - class Function; - class Module; - class raw_ostream; + +class BasicBlock; +class Function; +class Module; +class raw_ostream; class Trace { - typedef std::vector<BasicBlock *> BasicBlockListType; + using BasicBlockListType = std::vector<BasicBlock *>; + BasicBlockListType BasicBlocks; public: /// Trace ctor - Make a new trace from a vector of basic blocks, /// residing in the function which is the parent of the first /// basic block in the vector. - /// Trace(const std::vector<BasicBlock *> &vBB) : BasicBlocks (vBB) {} /// getEntryBasicBlock - Return the entry basic block (first block) /// of the trace. - /// BasicBlock *getEntryBasicBlock () const { return BasicBlocks[0]; } /// operator[]/getBlock - Return basic block N in the trace. - /// BasicBlock *operator[](unsigned i) const { return BasicBlocks[i]; } BasicBlock *getBlock(unsigned i) const { return BasicBlocks[i]; } /// getFunction - Return this trace's parent function. - /// Function *getFunction () const; /// getModule - Return this Module that contains this trace's parent /// function. - /// Module *getModule () const; /// getBlockIndex - Return the index of the specified basic block in the @@ -68,14 +65,12 @@ public: /// contains - Returns true if this trace contains the given basic /// block. - /// bool contains(const BasicBlock *X) const { return getBlockIndex(X) != -1; } /// Returns true if B1 occurs before B2 in the trace, or if it is the same /// block as B2.. Both blocks must be in the trace. - /// bool dominates(const BasicBlock *B1, const BasicBlock *B2) const { int B1Idx = getBlockIndex(B1), B2Idx = getBlockIndex(B2); assert(B1Idx != -1 && B2Idx != -1 && "Block is not in the trace!"); @@ -83,10 +78,10 @@ public: } // BasicBlock iterators... - typedef BasicBlockListType::iterator iterator; - typedef BasicBlockListType::const_iterator const_iterator; - typedef std::reverse_iterator<const_iterator> const_reverse_iterator; - typedef std::reverse_iterator<iterator> reverse_iterator; + using iterator = BasicBlockListType::iterator; + using const_iterator = BasicBlockListType::const_iterator; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; iterator begin() { return BasicBlocks.begin(); } const_iterator begin() const { return BasicBlocks.begin(); } @@ -105,12 +100,10 @@ public: iterator erase(iterator q1, iterator q2) { return BasicBlocks.erase (q1, q2); } /// print - Write trace to output stream. - /// void print(raw_ostream &O) const; /// dump - Debugger convenience method; writes trace to standard error /// output stream. - /// void dump() const; }; diff --git a/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/include/llvm/Analysis/TypeBasedAliasAnalysis.h index fd726e6cd37f..7fcfdb3a817c 100644 --- a/include/llvm/Analysis/TypeBasedAliasAnalysis.h +++ b/include/llvm/Analysis/TypeBasedAliasAnalysis.h @@ -6,22 +6,28 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This is the interface for a metadata-based TBAA. See the source file for /// details on the algorithm. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H #define LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Metadata.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include <memory> namespace llvm { +class Function; +class MDNode; +class MemoryLocation; + /// A simple AA result that uses TBAA metadata to answer queries. class TypeBasedAAResult : public AAResultBase<TypeBasedAAResult> { friend AAResultBase<TypeBasedAAResult>; @@ -50,10 +56,11 @@ private: /// Analysis pass providing a never-invalidated alias analysis result. class TypeBasedAA : public AnalysisInfoMixin<TypeBasedAA> { friend AnalysisInfoMixin<TypeBasedAA>; + static AnalysisKey Key; public: - typedef TypeBasedAAResult Result; + using Result = TypeBasedAAResult; TypeBasedAAResult run(Function &F, FunctionAnalysisManager &AM); }; @@ -81,6 +88,7 @@ public: // type-based alias analysis. // ImmutablePass *createTypeBasedAAWrapperPass(); -} -#endif +} // end namespace llvm + +#endif // LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H diff --git a/include/llvm/Analysis/ValueLattice.h b/include/llvm/Analysis/ValueLattice.h new file mode 100644 index 000000000000..18a43aafa8ca --- /dev/null +++ b/include/llvm/Analysis/ValueLattice.h @@ -0,0 +1,250 @@ +//===- ValueLattice.h - Value constraint analysis ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VALUELATTICE_H +#define LLVM_ANALYSIS_VALUELATTICE_H + +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" +// +//===----------------------------------------------------------------------===// +// ValueLatticeElement +//===----------------------------------------------------------------------===// + +/// This class represents lattice values for constants. +/// +/// FIXME: This is basically just for bringup, this can be made a lot more rich +/// in the future. +/// + +namespace llvm { +class ValueLatticeElement { + enum ValueLatticeElementTy { + /// This Value has no known value yet. As a result, this implies the + /// producing instruction is dead. Caution: We use this as the starting + /// state in our local meet rules. In this usage, it's taken to mean + /// "nothing known yet". + undefined, + + /// This Value has a specific constant value. (For constant integers, + /// constantrange is used instead. Integer typed constantexprs can appear + /// as constant.) + constant, + + /// This Value is known to not have the specified value. (For constant + /// integers, constantrange is used instead. As above, integer typed + /// constantexprs can appear here.) + notconstant, + + /// The Value falls within this range. (Used only for integer typed values.) + constantrange, + + /// We can not precisely model the dynamic values this value might take. + overdefined + }; + + /// Val: This stores the current lattice value along with the Constant* for + /// the constant if this is a 'constant' or 'notconstant' value. + ValueLatticeElementTy Tag; + Constant *Val; + ConstantRange Range; + +public: + ValueLatticeElement() : Tag(undefined), Val(nullptr), Range(1, true) {} + + static ValueLatticeElement get(Constant *C) { + ValueLatticeElement Res; + if (!isa<UndefValue>(C)) + Res.markConstant(C); + return Res; + } + static ValueLatticeElement getNot(Constant *C) { + ValueLatticeElement Res; + if (!isa<UndefValue>(C)) + Res.markNotConstant(C); + return Res; + } + static ValueLatticeElement getRange(ConstantRange CR) { + ValueLatticeElement Res; + Res.markConstantRange(std::move(CR)); + return Res; + } + static ValueLatticeElement getOverdefined() { + ValueLatticeElement Res; + Res.markOverdefined(); + return Res; + } + + bool isUndefined() const { return Tag == undefined; } + bool isConstant() const { return Tag == constant; } + bool isNotConstant() const { return Tag == notconstant; } + bool isConstantRange() const { return Tag == constantrange; } + bool isOverdefined() const { return Tag == overdefined; } + + Constant *getConstant() const { + assert(isConstant() && "Cannot get the constant of a non-constant!"); + return Val; + } + + Constant *getNotConstant() const { + assert(isNotConstant() && "Cannot get the constant of a non-notconstant!"); + return Val; + } + + const ConstantRange &getConstantRange() const { + assert(isConstantRange() && + "Cannot get the constant-range of a non-constant-range!"); + return Range; + } + + Optional<APInt> asConstantInteger() const { + if (isConstant() && isa<ConstantInt>(Val)) { + return cast<ConstantInt>(Val)->getValue(); + } else if (isConstantRange() && Range.isSingleElement()) { + return *Range.getSingleElement(); + } + return None; + } + +private: + void markOverdefined() { + if (isOverdefined()) + return; + Tag = overdefined; + } + + void markConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + markConstantRange(ConstantRange(CI->getValue())); + return; + } + if (isa<UndefValue>(V)) + return; + + assert((!isConstant() || getConstant() == V) && + "Marking constant with different value"); + assert(isUndefined()); + Tag = constant; + Val = V; + } + + void markNotConstant(Constant *V) { + assert(V && "Marking constant with NULL"); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + markConstantRange(ConstantRange(CI->getValue() + 1, CI->getValue())); + return; + } + if (isa<UndefValue>(V)) + return; + + assert((!isConstant() || getConstant() != V) && + "Marking constant !constant with same value"); + assert((!isNotConstant() || getNotConstant() == V) && + "Marking !constant with different value"); + assert(isUndefined() || isConstant()); + Tag = notconstant; + Val = V; + } + + void markConstantRange(ConstantRange NewR) { + if (isConstantRange()) { + if (NewR.isEmptySet()) + markOverdefined(); + else { + Range = std::move(NewR); + } + return; + } + + assert(isUndefined()); + if (NewR.isEmptySet()) + markOverdefined(); + else { + Tag = constantrange; + Range = std::move(NewR); + } + } + +public: + /// Updates this object to approximate both this object and RHS. Returns + /// true if this object has been changed. + bool mergeIn(const ValueLatticeElement &RHS, const DataLayout &DL) { + if (RHS.isUndefined() || isOverdefined()) + return false; + if (RHS.isOverdefined()) { + markOverdefined(); + return true; + } + + if (isUndefined()) { + *this = RHS; + return !RHS.isUndefined(); + } + + if (isConstant()) { + if (RHS.isConstant() && Val == RHS.Val) + return false; + markOverdefined(); + return true; + } + + if (isNotConstant()) { + if (RHS.isNotConstant() && Val == RHS.Val) + return false; + markOverdefined(); + return true; + } + + assert(isConstantRange() && "New ValueLattice type?"); + if (!RHS.isConstantRange()) { + // We can get here if we've encountered a constantexpr of integer type + // and merge it with a constantrange. + markOverdefined(); + return true; + } + ConstantRange NewR = Range.unionWith(RHS.getConstantRange()); + if (NewR.isFullSet()) + markOverdefined(); + else + markConstantRange(std::move(NewR)); + return true; + } + + ConstantInt *getConstantInt() const { + assert(isConstant() && isa<ConstantInt>(getConstant()) && + "No integer constant"); + return cast<ConstantInt>(getConstant()); + } + + bool satisfiesPredicate(CmpInst::Predicate Pred, + const ValueLatticeElement &Other) const { + // TODO: share with LVI getPredicateResult. + + if (isUndefined() || Other.isUndefined()) + return true; + + if (isConstant() && Other.isConstant() && Pred == CmpInst::FCMP_OEQ) + return getConstant() == Other.getConstant(); + + // Integer constants are represented as ConstantRanges with single + // elements. + if (!isConstantRange() || !Other.isConstantRange()) + return false; + + const auto &CR = getConstantRange(); + const auto &OtherCR = Other.getConstantRange(); + return ConstantRange::makeSatisfyingICmpRegion(Pred, OtherCR).contains(CR); + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val); + +} // end namespace llvm +#endif diff --git a/include/llvm/Analysis/ValueLatticeUtils.h b/include/llvm/Analysis/ValueLatticeUtils.h new file mode 100644 index 000000000000..02072672e56e --- /dev/null +++ b/include/llvm/Analysis/ValueLatticeUtils.h @@ -0,0 +1,41 @@ +//===-- ValueLatticeUtils.h - Utils for solving lattices --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares common functions useful for performing data-flow analyses +// that propagate values across function boundaries. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VALUELATTICEUTILS_H +#define LLVM_ANALYSIS_VALUELATTICEUTILS_H + +namespace llvm { + +class Function; +class GlobalVariable; + +/// Determine if the values of the given function's arguments can be tracked +/// interprocedurally. The value of an argument can be tracked if the function +/// has local linkage and its address is not taken. +bool canTrackArgumentsInterprocedurally(Function *F); + +/// Determine if the values of the given function's returns can be tracked +/// interprocedurally. Return values can be tracked if the function has an +/// exact definition and it doesn't have the "naked" attribute. Naked functions +/// may contain assembly code that returns untrackable values. +bool canTrackReturnsInterprocedurally(Function *F); + +/// Determine if the value maintained in the given global variable can be +/// tracked interprocedurally. A value can be tracked if the global variable +/// has local linkage and is only used by non-volatile loads and stores. +bool canTrackGlobalVariableInterprocedurally(GlobalVariable *GV); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_VALUELATTICEUTILS_H diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index da058b1d3918..1c51523b1573 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -15,32 +15,32 @@ #ifndef LLVM_ANALYSIS_VALUETRACKING_H #define LLVM_ANALYSIS_VALUETRACKING_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/IR/Intrinsics.h" +#include <cassert> +#include <cstdint> namespace llvm { -template <typename T> class ArrayRef; - class APInt; - class AddOperator; - class AssumptionCache; - class DataLayout; - class DominatorTree; - class GEPOperator; - class Instruction; - struct KnownBits; - class Loop; - class LoopInfo; - class OptimizationRemarkEmitter; - class MDNode; - class StringRef; - class TargetLibraryInfo; - class Value; - - namespace Intrinsic { - enum ID : unsigned; - } + +class AddOperator; +class APInt; +class AssumptionCache; +class DataLayout; +class DominatorTree; +class GEPOperator; +class IntrinsicInst; +struct KnownBits; +class Loop; +class LoopInfo; +class MDNode; +class OptimizationRemarkEmitter; +class StringRef; +class TargetLibraryInfo; +class Value; /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -56,17 +56,20 @@ template <typename T> class ArrayRef; const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr); + /// Returns the known bits rather than passing by reference. KnownBits computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr); + /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known); + /// Return true if LHS and RHS have no common bits set. bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -86,7 +89,7 @@ template <typename T> class ArrayRef; const DominatorTree *DT = nullptr); bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); - + /// Return true if the given value is known to be non-zero when defined. For /// vectors, return true if every element is known to be non-zero when /// defined. For pointers, if the context instruction and dominator tree are @@ -180,9 +183,13 @@ template <typename T> class ArrayRef; /// -0 --> true /// x > +0 --> true /// x < -0 --> false - /// bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); + /// Return true if the floating-point scalar value is not a NaN or if the + /// floating-point vector value has no NaN elements. Return false if a value + /// could ever be NaN. + bool isKnownNeverNaN(const Value *V); + /// Return true if we can prove that the specified FP value's sign bit is 0. /// /// NaN --> true/false (depending on the NaN's sign bit) @@ -190,7 +197,6 @@ template <typename T> class ArrayRef; /// -0 --> false /// x > +0 --> true /// x < -0 --> false - /// bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); /// If the specified value can be set by repeating the same byte in memory, @@ -214,9 +220,9 @@ template <typename T> class ArrayRef; /// pointer plus a constant offset. Return the base and offset to the caller. Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL); - static inline const Value * - GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, - const DataLayout &DL) { + inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr, + int64_t &Offset, + const DataLayout &DL) { return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset, DL); } @@ -231,8 +237,10 @@ template <typename T> class ArrayRef; /// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid /// initializer, it just doesn't fit the ConstantDataArray interface). const ConstantDataArray *Array; + /// Slice starts at this Offset. uint64_t Offset; + /// Length of the slice. uint64_t Length; @@ -242,14 +250,15 @@ template <typename T> class ArrayRef; Offset += Delta; Length -= Delta; } + /// Convenience accessor for elements in the slice. uint64_t operator[](unsigned I) const { return Array==nullptr ? 0 : Array->getElementAsInteger(I + Offset); } }; - /// Returns true if the value \p V is a pointer into a ContantDataArray. - /// If successful \p Index will point to a ConstantDataArray info object + /// Returns true if the value \p V is a pointer into a ConstantDataArray. + /// If successful \p Slice will point to a ConstantDataArray info object /// with an appropriate offset. bool getConstantDataArrayInfo(const Value *V, ConstantDataArraySlice &Slice, unsigned ElementSize, uint64_t Offset = 0); @@ -274,9 +283,8 @@ template <typename T> class ArrayRef; /// be stripped off. Value *GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup = 6); - static inline const Value *GetUnderlyingObject(const Value *V, - const DataLayout &DL, - unsigned MaxLookup = 6) { + inline const Value *GetUnderlyingObject(const Value *V, const DataLayout &DL, + unsigned MaxLookup = 6) { return GetUnderlyingObject(const_cast<Value *>(V), DL, MaxLookup); } @@ -314,7 +322,7 @@ template <typename T> class ArrayRef; /// This is a wrapper around GetUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. - void getUnderlyingObjectsForCodeGen(const Value *V, + bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl<Value *> &Objects, const DataLayout &DL); @@ -358,18 +366,9 @@ template <typename T> class ArrayRef; /// operands are not memory dependent. bool mayBeMemoryDependent(const Instruction &I); - /// Return true if this pointer couldn't possibly be null by its definition. - /// This returns true for allocas, non-extern-weak globals, and byval - /// arguments. - bool isKnownNonNull(const Value *V); - - /// Return true if this pointer couldn't possibly be null. If the context - /// instruction and dominator tree are specified, perform context-sensitive - /// analysis and return true if the pointer couldn't possibly be null at the - /// specified instruction. - bool isKnownNonNullAt(const Value *V, - const Instruction *CtxI = nullptr, - const DominatorTree *DT = nullptr); + /// Return true if it is an intrinsic that cannot be speculated but also + /// cannot trap. + bool isAssumeLikeIntrinsic(const Instruction *I); /// Return true if it is valid to use the assumptions provided by an /// assume intrinsic, I, at the point in the control-flow identified by the @@ -378,6 +377,7 @@ template <typename T> class ArrayRef; const DominatorTree *DT = nullptr); enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows }; + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -466,6 +466,7 @@ template <typename T> class ArrayRef; SPF_ABS, /// Absolute value SPF_NABS /// Negated absolute value }; + /// \brief Behavior when a floating point min/max is given one NaN and one /// non-NaN as input. enum SelectPatternNaNBehavior { @@ -476,6 +477,7 @@ template <typename T> class ArrayRef; /// it has been determined that no operands can /// be NaN). }; + struct SelectPatternResult { SelectPatternFlavor Flavor; SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is @@ -489,6 +491,7 @@ template <typename T> class ArrayRef; return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS); } }; + /// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind /// and providing the out parameter results if we successfully match. /// @@ -506,7 +509,7 @@ template <typename T> class ArrayRef; /// SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp = nullptr); - static inline SelectPatternResult + inline SelectPatternResult matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS, Instruction::CastOps *CastOp = nullptr) { Value *L = const_cast<Value*>(LHS); @@ -528,11 +531,8 @@ template <typename T> class ArrayRef; /// F | T | T /// (A) Optional<bool> isImpliedCondition(const Value *LHS, const Value *RHS, - const DataLayout &DL, - bool LHSIsFalse = false, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DataLayout &DL, bool LHSIsTrue = true, + unsigned Depth = 0); } // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_VALUETRACKING_H |
