summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-07-29 20:15:26 +0000
commit344a3780b2e33f6ca763666c380202b18aab72a3 (patch)
treef0b203ee6eb71d7fdd792373e3c81eb18d6934dd /llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp132
1 files changed, 78 insertions, 54 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index e7ac2391512f..03c4da8495ab 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
-int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+InstructionCost SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -63,10 +63,10 @@ int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
return 4 * TTI::TCC_Basic;
}
-int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind,
- Instruction *Inst) {
+InstructionCost SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ Instruction *Inst) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -183,9 +183,10 @@ int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
-int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty,
- TTI::TargetCostKind CostKind) {
+InstructionCost
+SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -246,7 +247,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
bool HasCall = false;
- unsigned NumStores = 0;
+ InstructionCost NumStores = 0;
for (auto &BB : L->blocks())
for (auto &I : *BB) {
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
@@ -270,7 +271,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// The z13 processor will run out of store tags if too many stores
// are fed into it too quickly. Therefore make sure there are not
// too many stores in the resulting unrolled loop.
- unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
+ unsigned const NumStoresVal = *NumStores.getValue();
+ unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
if (HasCall) {
// Only allow full unrolling if loop has any calls.
@@ -323,12 +325,18 @@ unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
return 0;
}
-unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const {
- if (!Vector)
- return 64;
- if (ST->hasVector())
- return 128;
- return 0;
+TypeSize
+SystemZTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+ switch (K) {
+ case TargetTransformInfo::RGK_Scalar:
+ return TypeSize::getFixed(64);
+ case TargetTransformInfo::RGK_FixedWidthVector:
+ return TypeSize::getFixed(ST->hasVector() ? 128 : 0);
+ case TargetTransformInfo::RGK_ScalableVector:
+ return TypeSize::getScalable(0);
+ }
+
+ llvm_unreachable("Unsupported register kind");
}
unsigned SystemZTTIImpl::getMinPrefetchStride(unsigned NumMemAccesses,
@@ -372,10 +380,10 @@ static unsigned getNumVectorRegs(Type *Ty) {
return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
}
-int SystemZTTIImpl::getArithmeticInstrCost(
+InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
- TTI::OperandValueKind Op1Info,
- TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
@@ -487,8 +495,10 @@ int SystemZTTIImpl::getArithmeticInstrCost(
if (DivRemConstPow2)
return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
- if (DivRemConst)
- return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args);
+ if (DivRemConst) {
+ SmallVector<Type *> Tys(Args.size(), Ty);
+ return VF * DivMulSeqCost + getScalarizationOverhead(VTy, Args, Tys);
+ }
if ((SignedDivRem || UnsignedDivRem) && VF > 4)
// Temporary hack: disable high vectorization factors with integer
// division/remainder, which will get scalarized and handled with
@@ -509,9 +519,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
return NumVectors;
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values.
- unsigned ScalarCost =
+ InstructionCost ScalarCost =
getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
- unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(VTy, Args);
+ SmallVector<Type *> Tys(Args.size(), Ty);
+ InstructionCost Cost =
+ (VF * ScalarCost) + getScalarizationOverhead(VTy, Args, Tys);
// FIXME: VF 2 for these FP operations are currently just as
// expensive as for VF 4.
if (VF == 2)
@@ -528,7 +540,9 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// There is no native support for FRem.
if (Opcode == Instruction::FRem) {
- unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args);
+ SmallVector<Type *> Tys(Args.size(), Ty);
+ InstructionCost Cost =
+ (VF * LIBCALL_COST) + getScalarizationOverhead(VTy, Args, Tys);
// FIXME: VF 2 for float is currently just as expensive as for VF 4.
if (VF == 2 && ScalarBits == 32)
Cost *= 2;
@@ -541,8 +555,11 @@ int SystemZTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
-int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- int Index, VectorType *SubTp) {
+InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
+ VectorType *Tp,
+ ArrayRef<int> Mask, int Index,
+ VectorType *SubTp) {
+ Kind = improveShuffleKindFromMask(Kind, Mask);
if (ST->hasVector()) {
unsigned NumVectors = getNumVectorRegs(Tp);
@@ -575,7 +592,7 @@ int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
}
}
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
}
// Return the log2 difference of the element sizes of the two vector types.
@@ -700,13 +717,14 @@ getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst,
return Cost;
}
-int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
- TTI::CastContextHint CCH,
- TTI::TargetCostKind CostKind,
- const Instruction *I) {
+InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src,
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
// FIXME: Can the logic below also be used for these cost kinds?
if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
- int BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ auto BaseCost = BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
return BaseCost == 0 ? BaseCost : 1;
}
@@ -743,8 +761,13 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
}
}
else if (ST->hasVector()) {
+ // Vector to scalar cast.
auto *SrcVecTy = cast<FixedVectorType>(Src);
- auto *DstVecTy = cast<FixedVectorType>(Dst);
+ auto *DstVecTy = dyn_cast<FixedVectorType>(Dst);
+ if (!DstVecTy) {
+ // TODO: tune vector-to-scalar cast.
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ }
unsigned VF = SrcVecTy->getNumElements();
unsigned NumDstVectors = getNumVectorRegs(Dst);
unsigned NumSrcVectors = getNumVectorRegs(Src);
@@ -789,9 +812,9 @@ int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// Return the cost of multiple scalar invocation plus the cost of
// inserting and extracting the values. Base implementation does not
// realize float->int gets scalarized.
- unsigned ScalarCost = getCastInstrCost(
+ InstructionCost ScalarCost = getCastInstrCost(
Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind);
- unsigned TotCost = VF * ScalarCost;
+ InstructionCost TotCost = VF * ScalarCost;
bool NeedsInserts = true, NeedsExtracts = true;
// FP128 registers do not get inserted or extracted.
if (DstScalarBits == 128 &&
@@ -846,10 +869,11 @@ static unsigned getOperandsExtensionCost(const Instruction *I) {
return ExtCost;
}
-int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, CmpInst::Predicate VecPred,
- TTI::TargetCostKind CostKind,
- const Instruction *I) {
+InstructionCost SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
if (CostKind != TTI::TCK_RecipThroughput)
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
@@ -930,8 +954,8 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind);
}
-int SystemZTTIImpl::
-getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+InstructionCost SystemZTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
// vlvgp will insert two grs into a vector register, so only count half the
// number of instructions.
if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64))
@@ -1039,10 +1063,11 @@ static bool isBswapIntrinsicCall(const Value *V) {
return false;
}
-int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- MaybeAlign Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind,
- const Instruction *I) {
+InstructionCost SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ MaybeAlign Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
assert(!Src->isVoidTy() && "Invalid type");
// TODO: Handle other cost kinds.
@@ -1109,7 +1134,7 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// needed for using / defining the vector operands. The SystemZ version does
// roughly the same but bases the computations on vector permutations
// instead.
-int SystemZTTIImpl::getInterleavedMemoryOpCost(
+InstructionCost SystemZTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
@@ -1120,9 +1145,6 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(
assert(isa<VectorType>(VecTy) &&
"Expect a vector type for interleaved memory op");
- // Return the ceiling of dividing A by B.
- auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
-
unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
unsigned VF = NumElts / Factor;
@@ -1149,7 +1171,7 @@ int SystemZTTIImpl::getInterleavedMemoryOpCost(
// requires one operation, except that vperm can handle two input
// registers first time for each dst vector.
unsigned NumSrcVecs = ValueVecs[Index].count();
- unsigned NumDstVecs = ceil(VF * getScalarSizeInBits(VecTy), 128U);
+ unsigned NumDstVecs = divideCeil(VF * getScalarSizeInBits(VecTy), 128U);
assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources");
NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs);
}
@@ -1173,9 +1195,11 @@ static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy) {
return -1;
}
-int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
- TTI::TargetCostKind CostKind) {
- int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
+InstructionCost
+SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost =
+ getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType());
if (Cost != -1)
return Cost;
return BaseT::getIntrinsicInstrCost(ICA, CostKind);