diff options
Diffstat (limited to 'include/llvm/Analysis/TargetTransformInfo.h')
-rw-r--r-- | include/llvm/Analysis/TargetTransformInfo.h | 154 |
1 files changed, 134 insertions, 20 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 3913cc3f107c3..7570d22a803cb 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -25,6 +25,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" #include <functional> @@ -34,7 +36,6 @@ namespace llvm { class Function; class GlobalValue; class Loop; -class PreservedAnalyses; class Type; class User; class Value; @@ -165,6 +166,14 @@ public: /// This overload allows specifying a set of candidate argument values. int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const; + /// \returns A value by which our inlining threshold should be multiplied. + /// This is primarily used to bump up the inlining threshold wholesale on + /// targets where calls are unusually expensive. + /// + /// TODO: This is a rather blunt instrument. Perhaps altering the costs of + /// individual classes of instructions would be better. + unsigned getInliningThresholdMultiplier() const; + /// \brief Estimate the cost of an intrinsic when lowered. /// /// Mirrors the \c getCallCost method but uses an intrinsic identifier. @@ -260,6 +269,10 @@ public: // (set to UINT_MAX to disable). This does not apply in cases where the // loop is being fully unrolled. unsigned MaxCount; + /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but + /// applies even if full unrolling is selected. This allows a target to fall + /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. + unsigned FullUnrollMaxCount; /// Allow partial unrolling (unrolling of loops to expand the size of the /// loop body, not only to eliminate small constant-trip-count loops). bool Partial; @@ -267,9 +280,14 @@ public: /// loop body even when the number of loop iterations is not known at /// compile time). bool Runtime; + /// Allow generation of a loop remainder (extra iterations after unroll). + bool AllowRemainder; /// Allow emitting expensive instructions (such as divisions) when computing /// the trip count of a loop for runtime unrolling. bool AllowExpensiveTripCount; + /// Apply loop unroll on any kind of loop + /// (mainly to loops that fail runtime unrolling). + bool Force; }; /// \brief Get target-customized preferences for the generic loop unrolling @@ -313,8 +331,7 @@ public: unsigned AddrSpace = 0) const; /// \brief Return true if the target supports masked load/store - /// AVX2 and AVX-512 targets allow masks for consecutive load and store for - /// 32 and 64 bit elements. + /// AVX2 and AVX-512 targets allow masks for consecutive load and store bool isLegalMaskedStore(Type *DataType) const; bool isLegalMaskedLoad(Type *DataType) const; @@ -362,6 +379,20 @@ public: /// \brief Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; + /// \brief Indicate that it is potentially unsafe to automatically vectorize + /// floating-point operations because the semantics of vector and scalar + /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math + /// does not support IEEE-754 denormal numbers, while depending on the + /// platform, scalar floating-point math does. + /// This applies to floating-point math operations and calls, not memory + /// operations, shuffles, or casts. + bool isFPVectorizationPotentiallyUnsafe() const; + + /// \brief Determine if the target supports unaligned memory accesses. + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0, + unsigned Alignment = 1, + bool *Fast = nullptr) const; + /// \brief Return hardware support for population count. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; @@ -383,6 +414,16 @@ public: Type *Ty) const; int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const; + + /// \brief Return the expected cost for the given integer when optimising + /// for size. This is different than the other integer immediate cost + /// functions in that it is subtarget agnostic. This is useful when you e.g. + /// target one ISA such as Aarch32 but smaller encodings could be possible + /// with another such as Thumb. This return value is used as a penalty when + /// the total costs for a constant is calculated (the bigger the cost, the + /// more beneficial constant hoisting is). + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) const; /// @} /// \name Vector Target Information @@ -416,6 +457,27 @@ public: /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \return The bitwidth of the largest vector type that should be used to + /// load/store in the given address space. + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; + + /// \return The size of a cache line in bytes. + unsigned getCacheLineSize() const; + + /// \return How much before a load we should place the prefetch instruction. + /// This is currently measured in number of instructions. + unsigned getPrefetchDistance() const; + + /// \return Some HW prefetchers can handle accesses up to a certain constant + /// stride. This is the minimum stride in bytes where it makes sense to start + /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. + unsigned getMinPrefetchStride() const; + + /// \return The maximum number of iterations to prefetch ahead. If the + /// required number of iterations is more than this number, no prefetching is + /// performed. + unsigned getMaxPrefetchIterationsAhead() const; + /// \return The maximum interleave factor that any transform should try to /// perform for this target. This number depends on the level of parallelism /// and the number of execution units in the CPU. @@ -438,6 +500,11 @@ public: /// zext, etc. int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const; + /// \return The expected cost of a sign- or zero-extended vector extract. Use + /// -1 to indicate that there is no information about the index value. + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + unsigned Index = -1) const; + /// \return The expected cost of control-flow related instructions such as /// Phi, Ret, Br. int getCFInstrCost(unsigned Opcode) const; @@ -497,11 +564,11 @@ public: /// \returns The cost of Intrinsic instructions. Types analysis only. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) const; + ArrayRef<Type *> Tys, FastMathFlags FMF) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) const; + ArrayRef<Value *> Args, FastMathFlags FMF) const; /// \returns The cost of Call instructions. int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const; @@ -569,6 +636,7 @@ public: virtual int getCallCost(const Function *F, int NumArgs) = 0; virtual int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) = 0; + virtual unsigned getInliningThresholdMultiplier() = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, @@ -599,9 +667,16 @@ public: virtual bool shouldBuildLookupTables() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; + virtual bool isFPVectorizationPotentiallyUnsafe() = 0; + virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; + virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) = 0; virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -609,6 +684,11 @@ public: Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0; + virtual unsigned getCacheLineSize() = 0; + virtual unsigned getPrefetchDistance() = 0; + virtual unsigned getMinPrefetchStride() = 0; + virtual unsigned getMaxPrefetchIterationsAhead() = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -618,6 +698,8 @@ public: virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0; + virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, unsigned Index) = 0; virtual int getCFInstrCost(unsigned Opcode) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) = 0; @@ -639,9 +721,11 @@ public: virtual int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) = 0; + ArrayRef<Type *> Tys, + FastMathFlags FMF) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) = 0; + ArrayRef<Value *> Args, + FastMathFlags FMF) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; @@ -684,6 +768,9 @@ public: ArrayRef<const Value *> Arguments) override { return Impl.getCallCost(F, Arguments); } + unsigned getInliningThresholdMultiplier() override { + return Impl.getInliningThresholdMultiplier(); + } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) override { return Impl.getIntrinsicCost(IID, RetTy, ParamTys); @@ -751,6 +838,14 @@ public: bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } + bool isFPVectorizationPotentiallyUnsafe() override { + return Impl.isFPVectorizationPotentiallyUnsafe(); + } + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, + unsigned Alignment, bool *Fast) override { + return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, + Alignment, Fast); + } PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { return Impl.getPopcntSupport(IntTyWidthInBit); } @@ -758,6 +853,10 @@ public: int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } + int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, + Type *Ty) override { + return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); + } int getIntImmCost(const APInt &Imm, Type *Ty) override { return Impl.getIntImmCost(Imm, Ty); } @@ -775,6 +874,21 @@ public: unsigned getRegisterBitWidth(bool Vector) override { return Impl.getRegisterBitWidth(Vector); } + + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override { + return Impl.getLoadStoreVecRegBitWidth(AddrSpace); + } + + unsigned getCacheLineSize() override { + return Impl.getCacheLineSize(); + } + unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } + unsigned getMinPrefetchStride() override { + return Impl.getMinPrefetchStride(); + } + unsigned getMaxPrefetchIterationsAhead() override { + return Impl.getMaxPrefetchIterationsAhead(); + } unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } @@ -793,6 +907,10 @@ public: int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override { return Impl.getCastInstrCost(Opcode, Dst, Src); } + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + unsigned Index) override { + return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); + } int getCFInstrCost(unsigned Opcode) override { return Impl.getCFInstrCost(Opcode); } @@ -826,13 +944,14 @@ public: bool IsPairwiseForm) override { return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); } - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Type *> Tys) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Tys); + int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, + FastMathFlags FMF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef<Value *> Args) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Args); + ArrayRef<Value *> Args, + FastMathFlags FMF) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF); } int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) override { @@ -876,16 +995,10 @@ TargetTransformInfo::TargetTransformInfo(T Impl) /// is done in a subtarget specific way and LLVM supports compiling different /// functions targeting different subtargets in order to support runtime /// dispatch according to the observed subtarget. -class TargetIRAnalysis { +class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { public: typedef TargetTransformInfo Result; - /// \brief Opaque, unique identifier for this analysis pass. - static void *ID() { return (void *)&PassID; } - - /// \brief Provide access to a name for this pass for debugging purposes. - static StringRef name() { return "TargetIRAnalysis"; } - /// \brief Default construct a target IR analysis. /// /// This will use the module's datalayout to construct a baseline @@ -912,9 +1025,10 @@ public: return *this; } - Result run(const Function &F); + Result run(const Function &F, AnalysisManager<Function> &); private: + friend AnalysisInfoMixin<TargetIRAnalysis>; static char PassID; /// \brief The callback used to produce a result. |