diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 94 |
1 files changed, 56 insertions, 38 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 0b48f9f602b7..3364a9bcaccb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -61,6 +61,9 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); }; class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { @@ -70,10 +73,11 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> { friend BaseT; const GCNSubtarget *ST; - const AMDGPUTargetLowering *TLI; + const SITargetLowering *TLI; AMDGPUTTIImpl CommonTTI; bool IsGraphicsShader; bool HasFP32Denormals; + unsigned MaxVGPRs; const FeatureBitset InlineFeatureIgnoreList = { // Codegen control options which don't matter. @@ -133,13 +137,21 @@ public: TLI(ST->getTargetLowering()), CommonTTI(TM, F), IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())), - HasFP32Denormals(ST->hasFP32Denormals(F)) { } + HasFP32Denormals(AMDGPU::SIModeRegisterDefaults(F).allFP32Denormals()), + MaxVGPRs(ST->getMaxNumVGPRs( + std::max(ST->getWavesPerEU(F).first, + ST->getWavesPerEUForWorkGroup( + ST->getFlatWorkGroupSizes(F).second)))) {} bool hasBranchDivergence() { return true; } + bool useGPUDivergenceAnalysis() const; void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TTI::PSK_FastHardware; @@ -147,6 +159,7 @@ public: unsigned getHardwareNumberOfRegisters(bool Vector) const; unsigned getNumberOfRegisters(bool Vector) const; + unsigned getNumberOfRegisters(unsigned RCID) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getMinVectorRegisterBitWidth() const; unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, @@ -157,22 +170,30 @@ public: VectorType *VecTy) const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; - bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, - unsigned Alignment, + bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; - bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, - unsigned Alignment, + bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; - bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, - unsigned Alignment, + bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; - + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAddrSpace, unsigned DestAddrSpace, + unsigned SrcAlign, unsigned DestAlign) const; + + void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAddrSpace, + unsigned DestAddrSpace, + unsigned SrcAlign, + unsigned DestAlign) const; unsigned getMaxInterleaveFactor(unsigned VF); bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -180,7 +201,10 @@ public: ArrayRef<const Value *> Args = ArrayRef<const Value *>(), const Instruction *CxtI = nullptr); - unsigned getCFInstrCost(unsigned Opcode); + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); + + bool isInlineAsmSourceOfDivergence(const CallInst *CI, + ArrayRef<unsigned> Indices = {}) const; int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; @@ -196,13 +220,13 @@ public: bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const; - bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, - Value *OldV, Value *NewV) const; + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, + Value *NewV) const; unsigned getVectorSplitCost() { return 0; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp); + unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; @@ -211,23 +235,17 @@ public: int getInlinerVectorBonusPercent() { return 0; } - int getArithmeticReductionCost(unsigned Opcode, - Type *Ty, - bool IsPairwise); - template <typename T> - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<T *> Args, FastMathFlags FMF, - unsigned VF); - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX); - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF, - unsigned VF = 1); - int getMinMaxReductionCost(Type *Ty, Type *CondTy, - bool IsPairwiseForm, - bool IsUnsigned); - unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands); + int getArithmeticReductionCost( + unsigned Opcode, + VectorType *Ty, + bool IsPairwise, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); + int getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); }; class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> { @@ -245,28 +263,28 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))), TLI(ST->getTargetLowering()), - CommonTTI(TM, F) {} + CommonTTI(TM, F) {} const R600Subtarget *getST() const { return ST; } const AMDGPUTargetLowering *getTLI() const { return TLI; } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); unsigned getHardwareNumberOfRegisters(bool Vec) const; unsigned getNumberOfRegisters(bool Vec) const; unsigned getRegisterBitWidth(bool Vector) const; unsigned getMinVectorRegisterBitWidth() const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; - bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, unsigned Alignment, + bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; - bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, - unsigned Alignment, + bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; - bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, - unsigned Alignment, + bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const; unsigned getMaxInterleaveFactor(unsigned VF); - unsigned getCFInstrCost(unsigned Opcode); + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); }; |