summaryrefslogtreecommitdiff
path: root/include/llvm/Analysis/TargetTransformInfo.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/Analysis/TargetTransformInfo.h')
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h154
1 files changed, 134 insertions, 20 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 3913cc3f107c3..7570d22a803cb 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -25,6 +25,8 @@
#include "llvm/ADT/Optional.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/DataTypes.h"
#include <functional>
@@ -34,7 +36,6 @@ namespace llvm {
class Function;
class GlobalValue;
class Loop;
-class PreservedAnalyses;
class Type;
class User;
class Value;
@@ -165,6 +166,14 @@ public:
/// This overload allows specifying a set of candidate argument values.
int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
+ /// \returns A value by which our inlining threshold should be multiplied.
+ /// This is primarily used to bump up the inlining threshold wholesale on
+ /// targets where calls are unusually expensive.
+ ///
+ /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
+ /// individual classes of instructions would be better.
+ unsigned getInliningThresholdMultiplier() const;
+
/// \brief Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
@@ -260,6 +269,10 @@ public:
// (set to UINT_MAX to disable). This does not apply in cases where the
// loop is being fully unrolled.
unsigned MaxCount;
+ /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
+ /// applies even if full unrolling is selected. This allows a target to fall
+ /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
+ unsigned FullUnrollMaxCount;
/// Allow partial unrolling (unrolling of loops to expand the size of the
/// loop body, not only to eliminate small constant-trip-count loops).
bool Partial;
@@ -267,9 +280,14 @@ public:
/// loop body even when the number of loop iterations is not known at
/// compile time).
bool Runtime;
+ /// Allow generation of a loop remainder (extra iterations after unroll).
+ bool AllowRemainder;
/// Allow emitting expensive instructions (such as divisions) when computing
/// the trip count of a loop for runtime unrolling.
bool AllowExpensiveTripCount;
+ /// Apply loop unroll on any kind of loop
+ /// (mainly to loops that fail runtime unrolling).
+ bool Force;
};
/// \brief Get target-customized preferences for the generic loop unrolling
@@ -313,8 +331,7 @@ public:
unsigned AddrSpace = 0) const;
/// \brief Return true if the target supports masked load/store
- /// AVX2 and AVX-512 targets allow masks for consecutive load and store for
- /// 32 and 64 bit elements.
+ /// AVX2 and AVX-512 targets allow masks for consecutive load and store
bool isLegalMaskedStore(Type *DataType) const;
bool isLegalMaskedLoad(Type *DataType) const;
@@ -362,6 +379,20 @@ public:
/// \brief Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
+ /// \brief Indicate that it is potentially unsafe to automatically vectorize
+ /// floating-point operations because the semantics of vector and scalar
+ /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
+ /// does not support IEEE-754 denormal numbers, while depending on the
+ /// platform, scalar floating-point math does.
+ /// This applies to floating-point math operations and calls, not memory
+ /// operations, shuffles, or casts.
+ bool isFPVectorizationPotentiallyUnsafe() const;
+
+ /// \brief Determine if the target supports unaligned memory accesses.
+ bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
+ unsigned Alignment = 1,
+ bool *Fast = nullptr) const;
+
/// \brief Return hardware support for population count.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
@@ -383,6 +414,16 @@ public:
Type *Ty) const;
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const;
+
+ /// \brief Return the expected cost for the given integer when optimising
+ /// for size. This is different than the other integer immediate cost
+ /// functions in that it is subtarget agnostic. This is useful when you e.g.
+ /// target one ISA such as Aarch32 but smaller encodings could be possible
+ /// with another such as Thumb. This return value is used as a penalty when
+ /// the total costs for a constant is calculated (the bigger the cost, the
+ /// more beneficial constant hoisting is).
+ int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) const;
/// @}
/// \name Vector Target Information
@@ -416,6 +457,27 @@ public:
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
+ /// \return The bitwidth of the largest vector type that should be used to
+ /// load/store in the given address space.
+ unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+
+ /// \return The size of a cache line in bytes.
+ unsigned getCacheLineSize() const;
+
+ /// \return How much before a load we should place the prefetch instruction.
+ /// This is currently measured in number of instructions.
+ unsigned getPrefetchDistance() const;
+
+ /// \return Some HW prefetchers can handle accesses up to a certain constant
+ /// stride. This is the minimum stride in bytes where it makes sense to start
+ /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
+ unsigned getMinPrefetchStride() const;
+
+ /// \return The maximum number of iterations to prefetch ahead. If the
+ /// required number of iterations is more than this number, no prefetching is
+ /// performed.
+ unsigned getMaxPrefetchIterationsAhead() const;
+
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
@@ -438,6 +500,11 @@ public:
/// zext, etc.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
+ /// \return The expected cost of a sign- or zero-extended vector extract. Use
+ /// -1 to indicate that there is no information about the index value.
+ int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
+ unsigned Index = -1) const;
+
/// \return The expected cost of control-flow related instructions such as
/// Phi, Ret, Br.
int getCFInstrCost(unsigned Opcode) const;
@@ -497,11 +564,11 @@ public:
/// \returns The cost of Intrinsic instructions. Types analysis only.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) const;
+ ArrayRef<Type *> Tys, FastMathFlags FMF) const;
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args) const;
+ ArrayRef<Value *> Args, FastMathFlags FMF) const;
/// \returns The cost of Call instructions.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
@@ -569,6 +636,7 @@ public:
virtual int getCallCost(const Function *F, int NumArgs) = 0;
virtual int getCallCost(const Function *F,
ArrayRef<const Value *> Arguments) = 0;
+ virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
@@ -599,9 +667,16 @@ public:
virtual bool shouldBuildLookupTables() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
+ virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
+ virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+ unsigned AddressSpace,
+ unsigned Alignment,
+ bool *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual int getFPOpCost(Type *Ty) = 0;
+ virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) = 0;
virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
@@ -609,6 +684,11 @@ public:
Type *Ty) = 0;
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) = 0;
+ virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0;
+ virtual unsigned getCacheLineSize() = 0;
+ virtual unsigned getPrefetchDistance() = 0;
+ virtual unsigned getMinPrefetchStride() = 0;
+ virtual unsigned getMaxPrefetchIterationsAhead() = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -618,6 +698,8 @@ public:
virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
+ virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
+ VectorType *VecTy, unsigned Index) = 0;
virtual int getCFInstrCost(unsigned Opcode) = 0;
virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) = 0;
@@ -639,9 +721,11 @@ public:
virtual int getReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) = 0;
+ ArrayRef<Type *> Tys,
+ FastMathFlags FMF) = 0;
virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args) = 0;
+ ArrayRef<Value *> Args,
+ FastMathFlags FMF) = 0;
virtual int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
@@ -684,6 +768,9 @@ public:
ArrayRef<const Value *> Arguments) override {
return Impl.getCallCost(F, Arguments);
}
+ unsigned getInliningThresholdMultiplier() override {
+ return Impl.getInliningThresholdMultiplier();
+ }
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
ArrayRef<Type *> ParamTys) override {
return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
@@ -751,6 +838,14 @@ public:
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
}
+ bool isFPVectorizationPotentiallyUnsafe() override {
+ return Impl.isFPVectorizationPotentiallyUnsafe();
+ }
+ bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+ unsigned Alignment, bool *Fast) override {
+ return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+ Alignment, Fast);
+ }
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
return Impl.getPopcntSupport(IntTyWidthInBit);
}
@@ -758,6 +853,10 @@ public:
int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
+ int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty) override {
+ return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
+ }
int getIntImmCost(const APInt &Imm, Type *Ty) override {
return Impl.getIntImmCost(Imm, Ty);
}
@@ -775,6 +874,21 @@ public:
unsigned getRegisterBitWidth(bool Vector) override {
return Impl.getRegisterBitWidth(Vector);
}
+
+ unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override {
+ return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
+ }
+
+ unsigned getCacheLineSize() override {
+ return Impl.getCacheLineSize();
+ }
+ unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
+ unsigned getMinPrefetchStride() override {
+ return Impl.getMinPrefetchStride();
+ }
+ unsigned getMaxPrefetchIterationsAhead() override {
+ return Impl.getMaxPrefetchIterationsAhead();
+ }
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
@@ -793,6 +907,10 @@ public:
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
return Impl.getCastInstrCost(Opcode, Dst, Src);
}
+ int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
+ unsigned Index) override {
+ return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
+ }
int getCFInstrCost(unsigned Opcode) override {
return Impl.getCFInstrCost(Opcode);
}
@@ -826,13 +944,14 @@ public:
bool IsPairwiseForm) override {
return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
}
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys) override {
- return Impl.getIntrinsicInstrCost(ID, RetTy, Tys);
+ int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
+ FastMathFlags FMF) override {
+ return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
}
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args) override {
- return Impl.getIntrinsicInstrCost(ID, RetTy, Args);
+ ArrayRef<Value *> Args,
+ FastMathFlags FMF) override {
+ return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF);
}
int getCallInstrCost(Function *F, Type *RetTy,
ArrayRef<Type *> Tys) override {
@@ -876,16 +995,10 @@ TargetTransformInfo::TargetTransformInfo(T Impl)
/// is done in a subtarget specific way and LLVM supports compiling different
/// functions targeting different subtargets in order to support runtime
/// dispatch according to the observed subtarget.
-class TargetIRAnalysis {
+class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
public:
typedef TargetTransformInfo Result;
- /// \brief Opaque, unique identifier for this analysis pass.
- static void *ID() { return (void *)&PassID; }
-
- /// \brief Provide access to a name for this pass for debugging purposes.
- static StringRef name() { return "TargetIRAnalysis"; }
-
/// \brief Default construct a target IR analysis.
///
/// This will use the module's datalayout to construct a baseline
@@ -912,9 +1025,10 @@ public:
return *this;
}
- Result run(const Function &F);
+ Result run(const Function &F, AnalysisManager<Function> &);
private:
+ friend AnalysisInfoMixin<TargetIRAnalysis>;
static char PassID;
/// \brief The callback used to produce a result.