aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h94
1 files changed, 56 insertions, 38 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 0b48f9f602b7..3364a9bcaccb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -61,6 +61,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
};
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
@@ -70,10 +73,11 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
friend BaseT;
const GCNSubtarget *ST;
- const AMDGPUTargetLowering *TLI;
+ const SITargetLowering *TLI;
AMDGPUTTIImpl CommonTTI;
bool IsGraphicsShader;
bool HasFP32Denormals;
+ unsigned MaxVGPRs;
const FeatureBitset InlineFeatureIgnoreList = {
// Codegen control options which don't matter.
@@ -133,13 +137,21 @@ public:
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())),
- HasFP32Denormals(ST->hasFP32Denormals(F)) { }
+ HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()),
+ MaxVGPRs(ST->getMaxNumVGPRs(
+ std::max(ST->getWavesPerEU(F).first,
+ ST->getWavesPerEUForWorkGroup(
+ ST->getFlatWorkGroupSizes(F).second)))) {}
bool hasBranchDivergence() { return true; }
+ bool useGPUDivergenceAnalysis() const;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
return TTI::PSK_FastHardware;
@@ -147,6 +159,7 @@ public:
unsigned getHardwareNumberOfRegisters(bool Vector) const;
unsigned getNumberOfRegisters(bool Vector) const;
+ unsigned getNumberOfRegisters(unsigned RCID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -157,22 +170,30 @@ public:
VectorType *VecTy) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
- bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
-
+ Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign) const;
+
+ void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
+ LLVMContext &Context,
+ unsigned RemainingBytes,
+ unsigned SrcAddrSpace,
+ unsigned DestAddrSpace,
+ unsigned SrcAlign,
+ unsigned DestAlign) const;
unsigned getMaxInterleaveFactor(unsigned VF);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -180,7 +201,10 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
- unsigned getCFInstrCost(unsigned Opcode);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
+
+ bool isInlineAsmSourceOfDivergence(const CallInst *CI,
+ ArrayRef<unsigned> Indices = {}) const;
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
@@ -196,13 +220,13 @@ public:
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const;
- bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
- Value *OldV, Value *NewV) const;
+ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const;
unsigned getVectorSplitCost() { return 0; }
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp);
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
@@ -211,23 +235,17 @@ public:
int getInlinerVectorBonusPercent() { return 0; }
- int getArithmeticReductionCost(unsigned Opcode,
- Type *Ty,
- bool IsPairwise);
- template <typename T>
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<T *> Args, FastMathFlags FMF,
- unsigned VF);
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX);
- int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF = 1);
- int getMinMaxReductionCost(Type *Ty, Type *CondTy,
- bool IsPairwiseForm,
- bool IsUnsigned);
- unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands);
+ int getArithmeticReductionCost(
+ unsigned Opcode,
+ VectorType *Ty,
+ bool IsPairwise,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
+
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind);
+ int getMinMaxReductionCost(
+ VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
};
class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
@@ -245,28 +263,28 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()),
ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()),
- CommonTTI(TM, F) {}
+ CommonTTI(TM, F) {}
const R600Subtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
unsigned getHardwareNumberOfRegisters(bool Vec) const;
unsigned getNumberOfRegisters(bool Vec) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getMinVectorRegisterBitWidth() const;
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
- bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment,
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getCFInstrCost(unsigned Opcode);
+ unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
};