diff options
Diffstat (limited to 'include/llvm/Analysis/TargetTransformInfo.h')
| -rw-r--r-- | include/llvm/Analysis/TargetTransformInfo.h | 191 |
1 files changed, 157 insertions, 34 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 24edd3826a2e..c20f20cfbe4d 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -23,21 +23,28 @@ #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #include "llvm/ADT/Optional.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" #include <functional> namespace llvm { +namespace Intrinsic { +enum ID : unsigned; +} + class Function; class GlobalValue; +class IntrinsicInst; +class LoadInst; class Loop; -class ScalarEvolution; class SCEV; +class ScalarEvolution; +class StoreInst; +class SwitchInst; class Type; class User; class Value; @@ -107,6 +114,37 @@ public: /// \name Generic Target Information /// @{ + /// \brief The kind of cost model. + /// + /// There are several different cost models that can be customized by the + /// target. The normalization of each cost model may be target specific. + enum TargetCostKind { + TCK_RecipThroughput, ///< Reciprocal throughput. + TCK_Latency, ///< The latency of instruction. + TCK_CodeSize ///< Instruction code size. + }; + + /// \brief Query the cost of a specified instruction. + /// + /// Clients should use this interface to query the cost of an existing + /// instruction. The instruction must have a valid parent (basic block). + /// + /// Note, this method does not cache the cost calculation and it + /// can be expensive in some cases. + int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const { + switch (kind){ + case TCK_RecipThroughput: + return getInstructionThroughput(I); + + case TCK_Latency: + return getInstructionLatency(I); + + case TCK_CodeSize: + return getUserCost(I); + } + llvm_unreachable("Unknown instruction cost kind"); + } + /// \brief Underlying constants for 'cost' values in this interface. /// /// Many APIs in this interface return a cost. This enum defines the @@ -382,6 +420,8 @@ public: bool UpperBound; /// Allow peeling off loop iterations for loops with low dynamic tripcount. bool AllowPeeling; + /// Allow unrolling of all the iterations of the runtime loop remainder. + bool UnrollRemainder; }; /// \brief Get target-customized preferences for the generic loop unrolling @@ -420,10 +460,12 @@ public: /// this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. + /// If target returns true in LSRWithInstrQueries(), I may be valid. /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace = 0) const; + unsigned AddrSpace = 0, + Instruction *I = nullptr) const; /// \brief Return true if LSR cost of C1 is lower than C1. bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, @@ -440,6 +482,20 @@ public: bool isLegalMaskedScatter(Type *DataType) const; bool isLegalMaskedGather(Type *DataType) const; + /// Return true if the target has a unified operation to calculate division + /// and remainder. If so, the additional implicit multiplication and + /// subtraction required to calculate a remainder from division are free. This + /// can enable more aggressive transformations for division and remainder than + /// would typically be allowed using throughput or size cost models. + bool hasDivRemOp(Type *DataType, bool IsSigned) const; + + /// Return true if the given instruction (assumed to be a memory access + /// instruction) has a volatile variant. If that's the case then we can avoid + /// addrspacecast to generic AS for volatile loads/stores. Default + /// implementation returns false, which prevents address space inference for + /// volatile loads/stores. + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; + /// Return true if target doesn't mind addresses in vectors. bool prefersVectorizedAddressing() const; @@ -453,12 +509,12 @@ public: bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; - /// \brief Return true if target supports the load / store - /// instruction with the given Offset on the form reg + Offset. It - /// may be that Offset is too big for a certain type (register - /// class). - bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const; - + /// \brief Return true if the loop strength reduce pass should make + /// Instruction* based TTI queries to isLegalAddressingMode(). This is + /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned + /// immediate offset and no index register. + bool LSRWithInstrQueries() const; + /// \brief Return true if it's free to truncate a value of type Ty1 to type /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 /// by referencing its sub-register AX. @@ -498,8 +554,13 @@ public: /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; - /// \brief Enable inline expansion of memcmp - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const; + /// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is + /// true if this is the expansion of memcmp(p1, p2, s) == 0. + struct MemCmpExpansionOptions { + // The list of available load sizes (in bytes), sorted in decreasing order. + SmallVector<unsigned, 8> LoadSizes; + }; + const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const; /// \brief Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -525,6 +586,12 @@ public: /// \brief Return true if the hardware has a fast square-root instruction. bool haveFastSqrt(Type *Ty) const; + /// Return true if it is faster to check if a floating-point value is NaN + /// (or not-NaN) versus a comparison against a constant FP zero value. + /// Targets should override this if materializing a 0.0 for comparison is + /// generally as cheap as checking for ordered/unordered. + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const; + /// \brief Return the expected cost of supporting the floating point operation /// of the specified type. int getFPOpCost(Type *Ty) const; @@ -599,6 +666,22 @@ public: /// \return The size of a cache line in bytes. unsigned getCacheLineSize() const; + /// The possible cache levels + enum class CacheLevel { + L1D, // The L1 data cache + L2D, // The L2 data cache + + // We currently do not model L3 caches, as their sizes differ widely between + // microarchitectures. Also, we currently do not have a use for L3 cache + // size modeling yet. + }; + + /// \return The size of the cache level in bytes, if available. + llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const; + + /// \return The associativity of the cache level, if available. + llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const; + /// \return How much before a load we should place the prefetch instruction. /// This is currently measured in number of instructions. unsigned getPrefetchDistance() const; @@ -699,11 +782,14 @@ public: /// /// Pairwise: /// (v0, v1, v2, v3) - /// ((v0+v1), (v2, v3), undef, undef) + /// ((v0+v1), (v2+v3), undef, undef) /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) - int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const; + int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const; + int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, + bool IsUnsigned) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction @@ -776,12 +862,6 @@ public: unsigned SrcAlign, unsigned DestAlign) const; - /// \returns True if we want to test the new memcpy lowering functionality in - /// Transform/Utils. - /// Temporary. Will be removed once we move to the new functionality and - /// remove the old. - bool useWideIRMemcpyLoopLowering() const; - /// \returns True if the two functions have compatible attributes for inlining /// purposes. bool areInlineCompatible(const Function *Caller, @@ -838,6 +918,14 @@ public: /// @} private: + /// \brief Estimate the latency of specified instruction. + /// Returns 1 as the default value. + int getInstructionLatency(const Instruction *I) const; + + /// \brief Returns the expected throughput cost of the instruction. + /// Returns -1 if the cost is unknown. + int getInstructionThroughput(const Instruction *I) const; + /// \brief The abstract base class used to type erase specific TTI /// implementations. class Concept; @@ -882,18 +970,21 @@ public: virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) = 0; + unsigned AddrSpace, + Instruction *I) = 0; virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; + virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; - virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0; + virtual bool LSRWithInstrQueries() = 0; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool isTypeLegal(Type *Ty) = 0; @@ -907,7 +998,8 @@ public: unsigned VF) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; - virtual bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) = 0; + virtual const MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, @@ -917,6 +1009,7 @@ public: bool *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; + virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -931,6 +1024,8 @@ public: virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() = 0; + virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0; + virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0; virtual unsigned getPrefetchDistance() = 0; virtual unsigned getMinPrefetchStride() = 0; virtual unsigned getMaxPrefetchIterationsAhead() = 0; @@ -965,8 +1060,10 @@ public: ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace) = 0; - virtual int getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) = 0; + virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) = 0; + virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, bool IsUnsigned) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) = 0; @@ -1009,6 +1106,7 @@ public: virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; + virtual int getInstructionLatency(const Instruction *I) = 0; }; template <typename T> @@ -1085,9 +1183,10 @@ public: } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) override { + unsigned AddrSpace, + Instruction *I) override { return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + Scale, AddrSpace, I); } bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) override { @@ -1105,6 +1204,12 @@ public: bool isLegalMaskedGather(Type *DataType) override { return Impl.isLegalMaskedGather(DataType); } + bool hasDivRemOp(Type *DataType, bool IsSigned) override { + return Impl.hasDivRemOp(DataType, IsSigned); + } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { + return Impl.hasVolatileVariant(I, AddrSpace); + } bool prefersVectorizedAddressing() override { return Impl.prefersVectorizedAddressing(); } @@ -1114,8 +1219,8 @@ public: return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } - bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override { - return Impl.isFoldableMemAccessOffset(I, Offset); + bool LSRWithInstrQueries() override { + return Impl.LSRWithInstrQueries(); } bool isTruncateFree(Type *Ty1, Type *Ty2) override { return Impl.isTruncateFree(Ty1, Ty2); @@ -1148,8 +1253,9 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } - bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) override { - return Impl.expandMemCmp(I, MaxLoadSize); + const MemCmpExpansionOptions *enableMemCmpExpansion( + bool IsZeroCmp) const override { + return Impl.enableMemCmpExpansion(IsZeroCmp); } bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); @@ -1168,6 +1274,10 @@ public: } bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override { + return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); + } + int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, @@ -1202,6 +1312,12 @@ public: unsigned getCacheLineSize() override { return Impl.getCacheLineSize(); } + llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override { + return Impl.getCacheSize(Level); + } + llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override { + return Impl.getCacheAssociativity(Level); + } unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); } unsigned getMinPrefetchStride() override { return Impl.getMinPrefetchStride(); @@ -1267,10 +1383,14 @@ public: return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); } - int getReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwiseForm) override { - return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm); + int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) override { + return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); } + int getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, bool IsUnsigned) override { + return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) override { return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, @@ -1360,6 +1480,9 @@ public: bool shouldExpandReduction(const IntrinsicInst *II) const override { return Impl.shouldExpandReduction(II); } + int getInstructionLatency(const Instruction *I) override { + return Impl.getInstructionLatency(I); + } }; template <typename T> |
