aboutsummaryrefslogtreecommitdiff
path: root/include/llvm/CodeGen/BasicTTIImpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/CodeGen/BasicTTIImpl.h')
-rw-r--r--include/llvm/CodeGen/BasicTTIImpl.h250
1 files changed, 202 insertions, 48 deletions
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index f105d887c397..70bf670fdf0b 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1,9 +1,8 @@
//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -197,11 +196,12 @@ protected:
public:
/// \name Scalar TTI Implementations
/// @{
- bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
- unsigned BitWidth, unsigned AddressSpace,
- unsigned Alignment, bool *Fast) const {
+ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
+ unsigned AddressSpace, unsigned Alignment,
+ bool *Fast) const {
EVT E = EVT::getIntegerVT(Context, BitWidth);
- return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
+ return getTLI()->allowsMisalignedMemoryAccesses(
+ E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
}
bool hasBranchDivergence() { return false; }
@@ -293,12 +293,12 @@ public:
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) {
- return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
+ ArrayRef<const Value *> Arguments, const User *U) {
+ return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
}
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) {
+ ArrayRef<Type *> ParamTys, const User *U) {
if (IID == Intrinsic::cttz) {
if (getTLI()->isCheapToSpeculateCttz())
return TargetTransformInfo::TCC_Basic;
@@ -311,7 +311,7 @@ public:
return TargetTransformInfo::TCC_Expensive;
}
- return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
+ return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -414,6 +414,12 @@ public:
if (TLI->isZExtFree(OpTy, Ty))
return TargetTransformInfo::TCC_Free;
return TargetTransformInfo::TCC_Basic;
+
+ case Instruction::AddrSpaceCast:
+ if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
+ Ty->getPointerAddressSpace()))
+ return TargetTransformInfo::TCC_Free;
+ return TargetTransformInfo::TCC_Basic;
}
return BaseT::getOperationCost(Opcode, Ty, OpTy);
@@ -421,6 +427,8 @@ public:
unsigned getInliningThresholdMultiplier() { return 1; }
+ int getInlinerVectorBonusPercent() { return 150; }
+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// This unrolling functionality is target independent, but to provide some
@@ -486,6 +494,13 @@ public:
UP.BEInsns = 2;
}
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) {
+ return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+ }
+
int getInstructionLatency(const Instruction *I) {
if (isa<LoadInst>(I))
return getST()->getSchedModel().DefaultLoadLatency;
@@ -657,7 +672,7 @@ public:
return 0;
if (Opcode == Instruction::AddrSpaceCast &&
- TLI->isNoopAddrSpaceCast(Src->getPointerAddressSpace(),
+ TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
Dst->getPointerAddressSpace()))
return 0;
@@ -997,7 +1012,7 @@ public:
// inside the loop.
if (UseMaskForGaps)
Cost += static_cast<T *>(this)->getArithmeticInstrCost(
- BinaryOperator::And, MaskVT);
+ BinaryOperator::And, MaskVT);
return Cost;
}
@@ -1058,8 +1073,8 @@ public:
case Intrinsic::experimental_vector_reduce_and:
case Intrinsic::experimental_vector_reduce_or:
case Intrinsic::experimental_vector_reduce_xor:
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_fmax:
@@ -1116,6 +1131,9 @@ public:
unsigned getIntrinsicInstrCost(
Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
+ unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
+ auto *ConcreteTTI = static_cast<T *>(this);
+
SmallVector<unsigned, 2> ISDs;
unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
switch (IID) {
@@ -1144,8 +1162,8 @@ public:
if (ScalarCalls == 1)
return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
- unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
- IID, ScalarRetTy, ScalarTys, FMF);
+ unsigned ScalarCost =
+ ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
return ScalarCalls * ScalarCost + ScalarizationCost;
}
@@ -1227,44 +1245,181 @@ public:
case Intrinsic::sideeffect:
return 0;
case Intrinsic::masked_store:
- return static_cast<T *>(this)
- ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
+ return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
+ 0);
case Intrinsic::masked_load:
- return static_cast<T *>(this)
- ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
+ return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
case Intrinsic::experimental_vector_reduce_add:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_mul:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_and:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_or:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
+ /*IsPairwiseForm=*/false);
case Intrinsic::experimental_vector_reduce_xor:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_fadd:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
- case Intrinsic::experimental_vector_reduce_fmul:
- return static_cast<T *>(this)->getArithmeticReductionCost(
- Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
+ return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
+ /*IsPairwiseForm=*/false);
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ return ConcreteTTI->getArithmeticReductionCost(
+ Instruction::FAdd, Tys[0],
+ /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+ // reductions.
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ return ConcreteTTI->getArithmeticReductionCost(
+ Instruction::FMul, Tys[0],
+ /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
+ // reductions.
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
- return static_cast<T *>(this)->getMinMaxReductionCost(
+ return ConcreteTTI->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
- /*IsSigned=*/true);
+ /*IsUnsigned=*/true);
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
- return static_cast<T *>(this)->getMinMaxReductionCost(
+ return ConcreteTTI->getMinMaxReductionCost(
Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
- /*IsSigned=*/false);
+ /*IsUnsigned=*/false);
+ case Intrinsic::sadd_sat:
+ case Intrinsic::ssub_sat: {
+ Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+ if (RetVF > 1)
+ CondTy = VectorType::get(CondTy, RetVF);
+
+ Type *OpTy = StructType::create({RetTy, CondTy});
+ Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
+ ? Intrinsic::sadd_with_overflow
+ : Intrinsic::ssub_with_overflow;
+
+ // SatMax -> Overflow && SumDiff < 0
+ // SatMin -> Overflow && SumDiff >= 0
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getIntrinsicInstrCost(
+ OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
+ CondTy, nullptr);
+ Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+ CondTy, nullptr);
+ return Cost;
+ }
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat: {
+ Type *CondTy = Type::getInt1Ty(RetTy->getContext());
+ if (RetVF > 1)
+ CondTy = VectorType::get(CondTy, RetVF);
+
+ Type *OpTy = StructType::create({RetTy, CondTy});
+ Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
+ ? Intrinsic::uadd_with_overflow
+ : Intrinsic::usub_with_overflow;
+
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getIntrinsicInstrCost(
+ OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+ CondTy, nullptr);
+ return Cost;
+ }
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix: {
+ unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
+ Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
+ if (RetVF > 1)
+ ExtTy = VectorType::get(ExtTy, RetVF);
+
+ unsigned ExtOp =
+ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+
+ unsigned Cost = 0;
+ Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+ Cost +=
+ 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
+ TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
+ TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
+ return Cost;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow: {
+ Type *SumTy = RetTy->getContainedType(0);
+ Type *OverflowTy = RetTy->getContainedType(1);
+ unsigned Opcode = IID == Intrinsic::sadd_with_overflow
+ ? BinaryOperator::Add
+ : BinaryOperator::Sub;
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+ Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
+ OverflowTy, nullptr);
+ Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
+ BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
+ Cost +=
+ ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
+ return Cost;
+ }
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::usub_with_overflow: {
+ Type *SumTy = RetTy->getContainedType(0);
+ Type *OverflowTy = RetTy->getContainedType(1);
+ unsigned Opcode = IID == Intrinsic::uadd_with_overflow
+ ? BinaryOperator::Add
+ : BinaryOperator::Sub;
+
+ unsigned Cost = 0;
+ Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
+ OverflowTy, nullptr);
+ return Cost;
+ }
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ Type *MulTy = RetTy->getContainedType(0);
+ Type *OverflowTy = RetTy->getContainedType(1);
+ unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
+ Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
+ if (MulTy->isVectorTy())
+ ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
+
+ unsigned ExtOp =
+ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+
+ unsigned Cost = 0;
+ Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
+ Cost +=
+ 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
+ Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
+ TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+
+ if (IID == Intrinsic::smul_with_overflow)
+ Cost += ConcreteTTI->getArithmeticInstrCost(
+ Instruction::AShr, MulTy, TTI::OK_AnyValue,
+ TTI::OK_UniformConstantValue);
+
+ Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
+ OverflowTy, nullptr);
+ return Cost;
+ }
case Intrinsic::ctpop:
ISDs.push_back(ISD::CTPOP);
// In case of legalization use TCC_Expensive. This is cheaper than a
@@ -1305,17 +1460,16 @@ public:
if (MinLegalCostI != LegalCost.end())
return *MinLegalCostI;
- auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
+ auto MinCustomCostI =
+ std::min_element(CustomCost.begin(), CustomCost.end());
if (MinCustomCostI != CustomCost.end())
return *MinCustomCostI;
// If we can't lower fmuladd into an FMA estimate the cost as a floating
// point mul followed by an add.
if (IID == Intrinsic::fmuladd)
- return static_cast<T *>(this)
- ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
- static_cast<T *>(this)
- ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
+ return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
+ ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
@@ -1333,7 +1487,7 @@ public:
Ty = Ty->getScalarType();
ScalarTys.push_back(Ty);
}
- unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
+ unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
IID, RetTy->getScalarType(), ScalarTys, FMF);
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
if (Tys[i]->isVectorTy()) {