diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-07-29 20:15:26 +0000 |
| commit | 344a3780b2e33f6ca763666c380202b18aab72a3 (patch) | |
| tree | f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | |
| parent | b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff) | |
vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0evendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | 132 |
1 files changed, 78 insertions, 54 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index e7ac2391512f..03c4da8495ab 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -30,8 +30,8 @@ using namespace llvm; // //===----------------------------------------------------------------------===// -int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { +InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -63,10 +63,10 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, return 4 * TTI::TCC_Basic; } -int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind, - Instruction *Inst) { +InstructionCost SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind, + Instruction *Inst) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -183,9 +183,10 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); } -int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { +InstructionCost +SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -246,7 +247,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // Find out if L contains a call, what the machine instruction count // estimate is, and how many stores there are. bool HasCall = false; - unsigned NumStores = 0; + InstructionCost NumStores = 0; for (auto &BB : L->blocks()) for (auto &I : *BB) { if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) { @@ -270,7 +271,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // The z13 processor will run out of store tags if too many stores // are fed into it too quickly. Therefore make sure there are not // too many stores in the resulting unrolled loop. - unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX); + unsigned const NumStoresVal = *NumStores.getValue(); + unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX); if (HasCall) { // Only allow full unrolling if loop has any calls. @@ -323,12 +325,18 @@ unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const { return 0; } -unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const { - if (!Vector) - return 64; - if (ST->hasVector()) - return 128; - return 0; +TypeSize +SystemZTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { + switch (K) { + case TargetTransformInfo::RGK_Scalar: + return TypeSize::getFixed(64); + case TargetTransformInfo::RGK_FixedWidthVector: + return TypeSize::getFixed(ST->hasVector() ? 128 : 0); + case TargetTransformInfo::RGK_ScalableVector: + return TypeSize::getScalable(0); + } + + llvm_unreachable("Unsupported register kind"); } unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses, @@ -372,10 +380,10 @@ static unsigned getNumVectorRegs(Type *Ty) { return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U)); } -int SystemZTTIImpl::getArithmeticInstrCost( +InstructionCost SystemZTTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, - TTI::OperandValueKind Op1Info, - TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, + TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, const Instruction *CxtI) { @@ -487,8 +495,10 @@ int SystemZTTIImpl::getArithmeticInstrCost( if (DivRemConstPow2) return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1)); - if (DivRemConst) - return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args); + if (DivRemConst) { + SmallVector<Type *> Tys(Args.size(), Ty); + return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args, Tys); + } if ((SignedDivRem || UnsignedDivRem) && VF > 4) // Temporary hack: disable high vectorization factors with integer // division/remainder, which will get scalarized and handled with @@ -509,9 +519,11 @@ int SystemZTTIImpl::getArithmeticInstrCost( return NumVectors; // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. - unsigned ScalarCost = + InstructionCost ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind); - unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args); + SmallVector<Type *> Tys(Args.size(), Ty); + InstructionCost Cost = + (VF * ScalarCost) + getScalarizationOverhead(VTy, Args, Tys); // FIXME: VF 2 for these FP operations are currently just as // expensive as for VF 4. if (VF == 2) @@ -528,7 +540,9 @@ int SystemZTTIImpl::getArithmeticInstrCost( // There is no native support for FRem. if (Opcode == Instruction::FRem) { - unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args); + SmallVector<Type *> Tys(Args.size(), Ty); + InstructionCost Cost = + (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args, Tys); // FIXME: VF 2 for float is currently just as expensive as for VF 4. if (VF == 2 && ScalarBits == 32) Cost *= 2; @@ -541,8 +555,11 @@ int SystemZTTIImpl::getArithmeticInstrCost( Opd1PropInfo, Opd2PropInfo, Args, CxtI); } -int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, - int Index, VectorType *SubTp) { +InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, + VectorType *Tp, + ArrayRef<int> Mask, int Index, + VectorType *SubTp) { + Kind = improveShuffleKindFromMask(Kind, Mask); if (ST->hasVector()) { unsigned NumVectors = getNumVectorRegs(Tp); @@ -575,7 +592,7 @@ int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, } } - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); } // Return the log2 difference of the element sizes of the two vector types. @@ -700,13 +717,14 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, return Cost; } -int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - TTI::CastContextHint CCH, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, + TTI::CastContextHint CCH, + TTI::TargetCostKind CostKind, + const Instruction *I) { // FIXME: Can the logic below also be used for these cost kinds? if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) { - int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + auto BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); return BaseCost == 0 ? BaseCost : 1; } @@ -743,8 +761,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, } } else if (ST->hasVector()) { + // Vector to scalar cast. auto *SrcVecTy = cast<FixedVectorType>(Src); - auto *DstVecTy = cast<FixedVectorType>(Dst); + auto *DstVecTy = dyn_cast<FixedVectorType>(Dst); + if (!DstVecTy) { + // TODO: tune vector-to-scalar cast. + return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + } unsigned VF = SrcVecTy->getNumElements(); unsigned NumDstVectors = getNumVectorRegs(Dst); unsigned NumSrcVectors = getNumVectorRegs(Src); @@ -789,9 +812,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. Base implementation does not // realize float->int gets scalarized. - unsigned ScalarCost = getCastInstrCost( + InstructionCost ScalarCost = getCastInstrCost( Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind); - unsigned TotCost = VF * ScalarCost; + InstructionCost TotCost = VF * ScalarCost; bool NeedsInserts = true, NeedsExtracts = true; // FP128 registers do not get inserted or extracted. if (DstScalarBits == 128 && @@ -846,10 +869,11 @@ static unsigned getOperandsExtensionCost(const Instruction *I) { return ExtCost; } -int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, CmpInst::Predicate VecPred, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); @@ -930,8 +954,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind); } -int SystemZTTIImpl:: -getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { +InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { // vlvgp will insert two grs into a vector register, so only count half the // number of instructions. if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64)) @@ -1039,10 +1063,11 @@ static bool isBswapIntrinsicCall(const Value *V) { return false; } -int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - MaybeAlign Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind, - const Instruction *I) { +InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); // TODO: Handle other cost kinds. @@ -1109,7 +1134,7 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // needed for using / defining the vector operands. The SystemZ version does // roughly the same but bases the computations on vector permutations // instead. -int SystemZTTIImpl::getInterleavedMemoryOpCost( +InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { @@ -1120,9 +1145,6 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost( assert(isa<VectorType>(VecTy) && "Expect a vector type for interleaved memory op"); - // Return the ceiling of dividing A by B. - auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; - unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements(); assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); unsigned VF = NumElts / Factor; @@ -1149,7 +1171,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost( // requires one operation, except that vperm can handle two input // registers first time for each dst vector. unsigned NumSrcVecs = ValueVecs[Index].count(); - unsigned NumDstVecs = ceil(VF * getScalarSizeInBits(VecTy), 128U); + unsigned NumDstVecs = divideCeil(VF * getScalarSizeInBits(VecTy), 128U); assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources"); NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs); } @@ -1173,9 +1195,11 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) { return -1; } -int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) { - int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType()); +InstructionCost +SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + InstructionCost Cost = + getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType()); if (Cost != -1) return Cost; return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
