Vendor import of llvm-project main 88e66fa60ae5, the last commit before - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2021-07-29 20:15:26 +0000
commit	344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
tree	f0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
parent	b60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)

vendor/llvm-project/llvmorg-13-init-16847-g88e66fa60ae5 vendor/llvm-project/llvmorg-12.0.1-rc2-0-ge7dac564cd0e vendor/llvm-project/llvmorg-12.0.1-0-gfed41342a82f

Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp')

-rw-r--r--

llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

132

1 files changed, 78 insertions, 54 deletions

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e7ac2391512f..03c4da8495ab 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp

@@ -30,8 +30,8 @@ using namespace llvm;

//===----------------------------------------------------------------------===//

-int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,

- TTI::TargetCostKind CostKind) {

+InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,

+ TTI::TargetCostKind CostKind) {

assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();

@@ -63,10 +63,10 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,

return 4 * TTI::TCC_Basic;

}

-int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,

- const APInt &Imm, Type *Ty,

- TTI::TargetCostKind CostKind,

- Instruction *Inst) {

+InstructionCost SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,

+ const APInt &Imm, Type *Ty,

+ TTI::TargetCostKind CostKind,

+ Instruction *Inst) {

assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();

@@ -183,9 +183,10 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,

return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);

}

-int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,

- const APInt &Imm, Type *Ty,

- TTI::TargetCostKind CostKind) {

+InstructionCost

+SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,

+ const APInt &Imm, Type *Ty,

+ TTI::TargetCostKind CostKind) {

assert(Ty->isIntegerTy());

unsigned BitSize = Ty->getPrimitiveSizeInBits();

@@ -246,7 +247,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

// Find out if L contains a call, what the machine instruction count

// estimate is, and how many stores there are.

bool HasCall = false;

- unsigned NumStores = 0;

+ InstructionCost NumStores = 0;

for (auto &BB : L->blocks())

for (auto &I : *BB) {

if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {

@@ -270,7 +271,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

// The z13 processor will run out of store tags if too many stores

// are fed into it too quickly. Therefore make sure there are not

// too many stores in the resulting unrolled loop.

- unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);

+ unsigned const NumStoresVal = *NumStores.getValue();

+ unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);

if (HasCall) {

// Only allow full unrolling if loop has any calls.

@@ -323,12 +325,18 @@ unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const {

return 0;

}

-unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {

- if (!Vector)

- return 64;

- if (ST->hasVector())

- return 128;

- return 0;

+TypeSize

+SystemZTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {

+ switch (K) {

+ case TargetTransformInfo::RGK_Scalar:

+ return TypeSize::getFixed(64);

+ case TargetTransformInfo::RGK_FixedWidthVector:

+ return TypeSize::getFixed(ST->hasVector() ? 128 : 0);

+ case TargetTransformInfo::RGK_ScalableVector:

+ return TypeSize::getScalable(0);

+ }

+ llvm_unreachable("Unsupported register kind");

}

unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,

@@ -372,10 +380,10 @@ static unsigned getNumVectorRegs(Type *Ty) {

return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));

}

-int SystemZTTIImpl::getArithmeticInstrCost(

+InstructionCost SystemZTTIImpl::getArithmeticInstrCost(

unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,

- TTI::OperandValueKind Op1Info,

- TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,

+ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,

+ TTI::OperandValueProperties Opd1PropInfo,

TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,

const Instruction *CxtI) {

@@ -487,8 +495,10 @@ int SystemZTTIImpl::getArithmeticInstrCost(

if (DivRemConstPow2)

return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));

- if (DivRemConst)

- return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args);

+ if (DivRemConst) {

+ SmallVector<Type *> Tys(Args.size(), Ty);

+ return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args, Tys);

+ }

if ((SignedDivRem || UnsignedDivRem) && VF > 4)

// Temporary hack: disable high vectorization factors with integer

// division/remainder, which will get scalarized and handled with

@@ -509,9 +519,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(

return NumVectors;

// Return the cost of multiple scalar invocation plus the cost of

// inserting and extracting the values.

- unsigned ScalarCost =

+ InstructionCost ScalarCost =

getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);

- unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args);

+ SmallVector<Type *> Tys(Args.size(), Ty);

+ InstructionCost Cost =

+ (VF * ScalarCost) + getScalarizationOverhead(VTy, Args, Tys);

// FIXME: VF 2 for these FP operations are currently just as

// expensive as for VF 4.

if (VF == 2)

@@ -528,7 +540,9 @@ int SystemZTTIImpl::getArithmeticInstrCost(

// There is no native support for FRem.

if (Opcode == Instruction::FRem) {

- unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args);

+ SmallVector<Type *> Tys(Args.size(), Ty);

+ InstructionCost Cost =

+ (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args, Tys);

// FIXME: VF 2 for float is currently just as expensive as for VF 4.

if (VF == 2 && ScalarBits == 32)

Cost *= 2;

@@ -541,8 +555,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(

Opd1PropInfo, Opd2PropInfo, Args, CxtI);

}

-int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,

- int Index, VectorType *SubTp) {

+InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,

+ VectorType *Tp,

+ ArrayRef<int> Mask, int Index,

+ VectorType *SubTp) {

+ Kind = improveShuffleKindFromMask(Kind, Mask);

if (ST->hasVector()) {

unsigned NumVectors = getNumVectorRegs(Tp);

@@ -575,7 +592,7 @@ int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,

}

- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);

+ return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);

}

// Return the log2 difference of the element sizes of the two vector types.

@@ -700,13 +717,14 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,

return Cost;

}

-int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

- TTI::CastContextHint CCH,

- TTI::TargetCostKind CostKind,

- const Instruction *I) {

+InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,

+ Type *Src,

+ TTI::CastContextHint CCH,

+ TTI::TargetCostKind CostKind,

+ const Instruction *I) {

// FIXME: Can the logic below also be used for these cost kinds?

if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {

- int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

+ auto BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

return BaseCost == 0 ? BaseCost : 1;

}

@@ -743,8 +761,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

}

else if (ST->hasVector()) {

+ // Vector to scalar cast.

auto *SrcVecTy = cast<FixedVectorType>(Src);

- auto *DstVecTy = cast<FixedVectorType>(Dst);

+ auto *DstVecTy = dyn_cast<FixedVectorType>(Dst);

+ if (!DstVecTy) {

+ // TODO: tune vector-to-scalar cast.

+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

+ }

unsigned VF = SrcVecTy->getNumElements();

unsigned NumDstVectors = getNumVectorRegs(Dst);

unsigned NumSrcVectors = getNumVectorRegs(Src);

@@ -789,9 +812,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

// Return the cost of multiple scalar invocation plus the cost of

// inserting and extracting the values. Base implementation does not

// realize float->int gets scalarized.

- unsigned ScalarCost = getCastInstrCost(

+ InstructionCost ScalarCost = getCastInstrCost(

Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind);

- unsigned TotCost = VF * ScalarCost;

+ InstructionCost TotCost = VF * ScalarCost;

bool NeedsInserts = true, NeedsExtracts = true;

// FP128 registers do not get inserted or extracted.

if (DstScalarBits == 128 &&

@@ -846,10 +869,11 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {

return ExtCost;

}

-int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

- Type *CondTy, CmpInst::Predicate VecPred,

- TTI::TargetCostKind CostKind,

- const Instruction *I) {

+InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

+ Type *CondTy,

+ CmpInst::Predicate VecPred,

+ TTI::TargetCostKind CostKind,

+ const Instruction *I) {

if (CostKind != TTI::TCK_RecipThroughput)

return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);

@@ -930,8 +954,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);

}

-int SystemZTTIImpl::

-getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {

+InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,

+ unsigned Index) {

// vlvgp will insert two grs into a vector register, so only count half the

// number of instructions.

if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64))

@@ -1039,10 +1063,11 @@ static bool isBswapIntrinsicCall(const Value *V) {

return false;

}

-int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

- MaybeAlign Alignment, unsigned AddressSpace,

- TTI::TargetCostKind CostKind,

- const Instruction *I) {

+InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

+ MaybeAlign Alignment,

+ unsigned AddressSpace,

+ TTI::TargetCostKind CostKind,

+ const Instruction *I) {

assert(!Src->isVoidTy() && "Invalid type");

// TODO: Handle other cost kinds.

@@ -1109,7 +1134,7 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,

// needed for using / defining the vector operands. The SystemZ version does

// roughly the same but bases the computations on vector permutations

// instead.

-int SystemZTTIImpl::getInterleavedMemoryOpCost(

+InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(

unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,

Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

bool UseMaskForCond, bool UseMaskForGaps) {

@@ -1120,9 +1145,6 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(

assert(isa<VectorType>(VecTy) &&

"Expect a vector type for interleaved memory op");

- // Return the ceiling of dividing A by B.

- auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };

unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();

assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");

unsigned VF = NumElts / Factor;

@@ -1149,7 +1171,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(

// requires one operation, except that vperm can handle two input

// registers first time for each dst vector.

unsigned NumSrcVecs = ValueVecs[Index].count();

- unsigned NumDstVecs = ceil(VF * getScalarSizeInBits(VecTy), 128U);

+ unsigned NumDstVecs = divideCeil(VF * getScalarSizeInBits(VecTy), 128U);

assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources");

NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs);

}

@@ -1173,9 +1195,11 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {

return -1;

}

-int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

- TTI::TargetCostKind CostKind) {

- int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());

+InstructionCost

+SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

+ TTI::TargetCostKind CostKind) {

+ InstructionCost Cost =

+ getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());

if (Cost != -1)

return Cost;

return BaseT::getIntrinsicInstrCost(ICA, CostKind);