summaryrefslogtreecommitdiff
path: root/include/llvm/CodeGen/BasicTTIImpl.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/CodeGen/BasicTTIImpl.h')
-rw-r--r--include/llvm/CodeGen/BasicTTIImpl.h199
1 files changed, 153 insertions, 46 deletions
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index 6331070247928..bb5e7f9e8e30f 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -6,25 +6,63 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
/// \file
/// This file provides a helper that implements much of the TTI interface in
/// terms of the target-independent code generator and TargetLowering
/// interfaces.
-///
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
#define LLVM_CODEGEN_BASICTTIIMPL_H
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <utility>
namespace llvm {
+class Function;
+class GlobalValue;
+class LLVMContext;
+class ScalarEvolution;
+class SCEV;
+class TargetMachine;
+
extern cl::opt<unsigned> PartialUnrollingThreshold;
/// \brief Base class which can be used to help build a TTI implementation.
@@ -39,8 +77,8 @@ extern cl::opt<unsigned> PartialUnrollingThreshold;
template <typename T>
class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
private:
- typedef TargetTransformInfoImplCRTPBase<T> BaseT;
- typedef TargetTransformInfo TTI;
+ using BaseT = TargetTransformInfoImplCRTPBase<T>;
+ using TTI = TargetTransformInfo;
/// Estimate a cost of shuffle as a sequence of extract and insert
/// operations.
@@ -110,13 +148,13 @@ public:
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) {
+ unsigned AddrSpace, Instruction *I = nullptr) {
TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
- return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
+ return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
@@ -133,10 +171,6 @@ public:
return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
}
- bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) {
- return getTLI()->isFoldableMemAccessOffset(I, Offset);
- }
-
bool isTruncateFree(Type *Ty1, Type *Ty2) {
return getTLI()->isTruncateFree(Ty1, Ty2);
}
@@ -235,7 +269,8 @@ public:
if (N < 2 || N < TLI->getMinimumJumpTableEntries())
return N;
uint64_t Range =
- (MaxCaseVal - MinCaseVal).getLimitedValue(UINT64_MAX - 1) + 1;
+ (MaxCaseVal - MinCaseVal)
+ .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
// Check whether a range of clusters is dense enough for a jump table
if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
JumpTableSize = Range;
@@ -262,6 +297,10 @@ public:
TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
}
+ bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
+ return true;
+ }
+
unsigned getFPOpCost(Type *Ty) {
// By default, FP instructions are no more expensive since they are
// implemented in HW. Target specific TTI can override this.
@@ -272,17 +311,15 @@ public:
const TargetLoweringBase *TLI = getTLI();
switch (Opcode) {
default: break;
- case Instruction::Trunc: {
+ case Instruction::Trunc:
if (TLI->isTruncateFree(OpTy, Ty))
return TargetTransformInfo::TCC_Free;
return TargetTransformInfo::TCC_Basic;
- }
- case Instruction::ZExt: {
+ case Instruction::ZExt:
if (TLI->isZExtFree(OpTy, Ty))
return TargetTransformInfo::TCC_Free;
return TargetTransformInfo::TCC_Basic;
}
- }
return BaseT::getOperationCost(Opcode, Ty, OpTy);
}
@@ -354,6 +391,13 @@ public:
UP.BEInsns = 2;
}
+ int getInstructionLatency(const Instruction *I) {
+ if (isa<LoadInst>(I))
+ return getST()->getSchedModel().DefaultLoadLatency;
+
+ return BaseT::getInstructionLatency(I);
+ }
+
/// @}
/// \name Vector TTI Implementations
@@ -394,8 +438,8 @@ public:
if (A->getType()->isVectorTy()) {
VecTy = A->getType();
// If A is a vector operand, VF should be 1 or correspond to A.
- assert ((VF == 1 || VF == VecTy->getVectorNumElements()) &&
- "Vector argument does not match VF");
+ assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
+ "Vector argument does not match VF");
}
else
VecTy = VectorType::get(A->getType(), VF);
@@ -408,8 +452,8 @@ public:
}
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
- assert (VecTy->isVectorTy());
-
+ assert(VecTy->isVectorTy());
+
unsigned Cost = 0;
Cost += getScalarizationOverhead(VecTy, true, false);
@@ -531,7 +575,6 @@ public:
// Handle scalar conversions.
if (!Src->isVectorTy() && !Dst->isVectorTy()) {
-
// Scalar bitcasts are usually free.
if (Opcode == Instruction::BitCast)
return 0;
@@ -547,7 +590,6 @@ public:
// Check vector-to-vector casts.
if (Dst->isVectorTy() && Src->isVectorTy()) {
-
// If the cast is between same-sized registers, then the check is simple.
if (SrcLT.first == DstLT.first &&
SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
@@ -743,7 +785,6 @@ public:
// We only scale the cost of loads since interleaved store groups aren't
// allowed to have gaps.
if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
-
// The number of loads of a legal type it will take to represent a load
// of the unlegalized vector type.
unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
@@ -821,7 +862,7 @@ public:
ArrayRef<Value *> Args, FastMathFlags FMF,
unsigned VF = 1) {
unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
- assert ((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+ assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
switch (IID) {
default: {
@@ -829,7 +870,7 @@ public:
SmallVector<Type *, 4> Types;
for (Value *Op : Args) {
Type *OpTy = Op->getType();
- assert (VF == 1 || !OpTy->isVectorTy());
+ assert(VF == 1 || !OpTy->isVectorTy());
Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
}
@@ -839,7 +880,7 @@ public:
// Compute the scalarization overhead based on Args for a vector
// intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
// CostModel will pass a vector RetTy and VF is 1.
- unsigned ScalarizationCost = UINT_MAX;
+ unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
if (RetVF > 1 || VF > 1) {
ScalarizationCost = 0;
if (!RetTy->isVoidTy())
@@ -851,7 +892,7 @@ public:
getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
}
case Intrinsic::masked_scatter: {
- assert (VF == 1 && "Can't vectorize types here.");
+ assert(VF == 1 && "Can't vectorize types here.");
Value *Mask = Args[3];
bool VarMask = !isa<Constant>(Mask);
unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
@@ -862,7 +903,7 @@ public:
Alignment);
}
case Intrinsic::masked_gather: {
- assert (VF == 1 && "Can't vectorize types here.");
+ assert(VF == 1 && "Can't vectorize types here.");
Value *Mask = Args[2];
bool VarMask = !isa<Constant>(Mask);
unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
@@ -873,13 +914,14 @@ public:
}
}
}
-
+
/// Get intrinsic cost based on argument types.
- /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
- /// arguments and the return value will be computed based on types.
- unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX) {
+ /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
+ /// cost of scalarizing the arguments and the return value will be computed
+ /// based on types.
+ unsigned getIntrinsicInstrCost(
+ Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
SmallVector<unsigned, 2> ISDs;
unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
switch (IID) {
@@ -889,7 +931,7 @@ public:
unsigned ScalarCalls = 1;
Type *ScalarRetTy = RetTy;
if (RetTy->isVectorTy()) {
- if (ScalarizationCostPassed == UINT_MAX)
+ if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
ScalarRetTy = RetTy->getScalarType();
@@ -898,7 +940,7 @@ public:
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
Type *Ty = Tys[i];
if (Ty->isVectorTy()) {
- if (ScalarizationCostPassed == UINT_MAX)
+ if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
ScalarizationCost += getScalarizationOverhead(Ty, false, true);
ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
Ty = Ty->getScalarType();
@@ -985,6 +1027,7 @@ public:
// FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
+ case Intrinsic::sideeffect:
return 0;
case Intrinsic::masked_store:
return static_cast<T *>(this)
@@ -1047,8 +1090,10 @@ public:
// this will emit a costly libcall, adding call overhead and spills. Make it
// very expensive.
if (RetTy->isVectorTy()) {
- unsigned ScalarizationCost = ((ScalarizationCostPassed != UINT_MAX) ?
- ScalarizationCostPassed : getScalarizationOverhead(RetTy, true, false));
+ unsigned ScalarizationCost =
+ ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
+ ? ScalarizationCostPassed
+ : getScalarizationOverhead(RetTy, true, false));
unsigned ScalarCalls = RetTy->getVectorNumElements();
SmallVector<Type *, 4> ScalarTys;
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
@@ -1061,7 +1106,7 @@ public:
IID, RetTy->getScalarType(), ScalarTys, FMF);
for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
if (Tys[i]->isVectorTy()) {
- if (ScalarizationCostPassed == UINT_MAX)
+ if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
}
@@ -1096,7 +1141,7 @@ public:
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
const SCEV *) {
- return 0;
+ return 0;
}
/// Try to calculate arithmetic and shuffle op costs for reduction operations.
@@ -1134,7 +1179,8 @@ public:
///
/// The cost model should take into account that the actual length of the
/// vector is reduced on each iteration.
- unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
+ unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwise) {
assert(Ty->isVectorTy() && "Expect a vector type");
Type *ScalarTy = Ty->getVectorElementType();
unsigned NumVecElts = Ty->getVectorNumElements();
@@ -1159,7 +1205,7 @@ public:
}
// The minimal length of the vector is limited by the real length of vector
// operations performed on the current platform. That's why several final
- // reduction opertions are perfomed on the vectors with the same
+ // reduction operations are performed on the vectors with the same
// architecture-dependent length.
ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
@@ -1169,6 +1215,66 @@ public:
return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
}
+ /// Try to calculate op costs for min/max reduction operations.
+ /// \param CondTy Conditional type for the Select instruction.
+ unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
+ bool) {
+ assert(Ty->isVectorTy() && "Expect a vector type");
+ Type *ScalarTy = Ty->getVectorElementType();
+ Type *ScalarCondTy = CondTy->getVectorElementType();
+ unsigned NumVecElts = Ty->getVectorNumElements();
+ unsigned NumReduxLevels = Log2_32(NumVecElts);
+ unsigned CmpOpcode;
+ if (Ty->isFPOrFPVectorTy()) {
+ CmpOpcode = Instruction::FCmp;
+ } else {
+ assert(Ty->isIntOrIntVectorTy() &&
+ "expecting floating point or integer type for min/max reduction");
+ CmpOpcode = Instruction::ICmp;
+ }
+ unsigned MinMaxCost = 0;
+ unsigned ShuffleCost = 0;
+ auto *ConcreteTTI = static_cast<T *>(this);
+ std::pair<unsigned, MVT> LT =
+ ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
+ unsigned LongVectorCount = 0;
+ unsigned MVTLen =
+ LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
+ while (NumVecElts > MVTLen) {
+ NumVecElts /= 2;
+ // Assume the pairwise shuffles add a cost.
+ ShuffleCost += (IsPairwise + 1) *
+ ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+ NumVecElts, Ty);
+ MinMaxCost +=
+ ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
+ ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+ nullptr);
+ Ty = VectorType::get(ScalarTy, NumVecElts);
+ CondTy = VectorType::get(ScalarCondTy, NumVecElts);
+ ++LongVectorCount;
+ }
+ // The minimal length of the vector is limited by the real length of vector
+ // operations performed on the current platform. That's why several final
+ // reduction opertions are perfomed on the vectors with the same
+ // architecture-dependent length.
+ ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
+ ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+ NumVecElts, Ty);
+ MinMaxCost +=
+ (NumReduxLevels - LongVectorCount) *
+ (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
+ ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
+ nullptr));
+ // Need 3 extractelement instructions for scalarization + an additional
+ // scalar select instruction.
+ return ShuffleCost + MinMaxCost +
+ 3 * getScalarizationOverhead(Ty, /*Insert=*/false,
+ /*Extract=*/true) +
+ ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+ ScalarCondTy, nullptr);
+ }
+
unsigned getVectorSplitCost() { return 1; }
/// @}
@@ -1177,7 +1283,8 @@ public:
/// \brief Concrete BasicTTIImpl that can be used if no further customization
/// is needed.
class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
- typedef BasicTTIImplBase<BasicTTIImpl> BaseT;
+ using BaseT = BasicTTIImplBase<BasicTTIImpl>;
+
friend class BasicTTIImplBase<BasicTTIImpl>;
const TargetSubtargetInfo *ST;
@@ -1190,6 +1297,6 @@ public:
explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_BASICTTIIMPL_H