diff options
Diffstat (limited to 'llvm/lib/Analysis/TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 1386 |
1 files changed, 1386 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp new file mode 100644 index 000000000000..c9c294873ea6 --- /dev/null +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -0,0 +1,1386 @@ +//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/LoopIterator.h" +#include <utility> + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "tti" + +static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), + cl::Hidden, + cl::desc("Recognize reduction patterns.")); + +namespace { +/// No-op implementation of the TTI interface using the utility base +/// classes. +/// +/// This is used when no target specific information is available. +struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> { + explicit NoTTIImpl(const DataLayout &DL) + : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {} +}; +} + +bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) { + // If the loop has irreducible control flow, it can not be converted to + // Hardware loop. + LoopBlocksRPO RPOT(L); + RPOT.perform(&LI); + if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI)) + return false; + return true; +} + +bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, + LoopInfo &LI, DominatorTree &DT, + bool ForceNestedLoop, + bool ForceHardwareLoopPHI) { + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + for (BasicBlock *BB : ExitingBlocks) { + // If we pass the updated counter back through a phi, we need to know + // which latch the updated value will be coming from. + if (!L->isLoopLatch(BB)) { + if (ForceHardwareLoopPHI || CounterInReg) + continue; + } + + const SCEV *EC = SE.getExitCount(L, BB); + if (isa<SCEVCouldNotCompute>(EC)) + continue; + if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) { + if (ConstEC->getValue()->isZero()) + continue; + } else if (!SE.isLoopInvariant(EC, L)) + continue; + + if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth()) + continue; + + // If this exiting block is contained in a nested loop, it is not eligible + // for insertion of the branch-and-decrement since the inner loop would + // end up messing up the value in the CTR. + if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop) + continue; + + // We now have a loop-invariant count of loop iterations (which is not the + // constant zero) for which we know that this loop will not exit via this + // existing block. + + // We need to make sure that this block will run on every loop iteration. + // For this to be true, we must dominate all blocks with backedges. Such + // blocks are in-loop predecessors to the header block. + bool NotAlways = false; + for (BasicBlock *Pred : predecessors(L->getHeader())) { + if (!L->contains(Pred)) + continue; + + if (!DT.dominates(BB, Pred)) { + NotAlways = true; + break; + } + } + + if (NotAlways) + continue; + + // Make sure this blocks ends with a conditional branch. + Instruction *TI = BB->getTerminator(); + if (!TI) + continue; + + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (!BI->isConditional()) + continue; + + ExitBranch = BI; + } else + continue; + + // Note that this block may not be the loop latch block, even if the loop + // has a latch block. + ExitBlock = BB; + ExitCount = EC; + break; + } + + if (!ExitBlock) + return false; + return true; +} + +TargetTransformInfo::TargetTransformInfo(const DataLayout &DL) + : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {} + +TargetTransformInfo::~TargetTransformInfo() {} + +TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg) + : TTIImpl(std::move(Arg.TTIImpl)) {} + +TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) { + TTIImpl = std::move(RHS.TTIImpl); + return *this; +} + +int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs, + const User *U) const { + int Cost = TTIImpl->getCallCost(FTy, NumArgs, U); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCallCost(const Function *F, + ArrayRef<const Value *> Arguments, + const User *U) const { + int Cost = TTIImpl->getCallCost(F, Arguments, U); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +unsigned TargetTransformInfo::getInliningThresholdMultiplier() const { + return TTIImpl->getInliningThresholdMultiplier(); +} + +int TargetTransformInfo::getInlinerVectorBonusPercent() const { + return TTIImpl->getInlinerVectorBonusPercent(); +} + +int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands) const { + return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); +} + +int TargetTransformInfo::getExtCost(const Instruction *I, + const Value *Src) const { + return TTIImpl->getExtCost(I, Src); +} + +int TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments, + const User *U) const { + int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +unsigned +TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) const { + return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); +} + +int TargetTransformInfo::getUserCost(const User *U, + ArrayRef<const Value *> Operands) const { + int Cost = TTIImpl->getUserCost(U, Operands); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +bool TargetTransformInfo::hasBranchDivergence() const { + return TTIImpl->hasBranchDivergence(); +} + +bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { + return TTIImpl->isSourceOfDivergence(V); +} + +bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { + return TTIImpl->isAlwaysUniform(V); +} + +unsigned TargetTransformInfo::getFlatAddressSpace() const { + return TTIImpl->getFlatAddressSpace(); +} + +bool TargetTransformInfo::collectFlatAddressOperands( + SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { + return TTIImpl->collectFlatAddressOperands(OpIndexes, IID); +} + +bool TargetTransformInfo::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { + return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); +} + +bool TargetTransformInfo::isLoweredToCall(const Function *F) const { + return TTIImpl->isLoweredToCall(F); +} + +bool TargetTransformInfo::isHardwareLoopProfitable( + Loop *L, ScalarEvolution &SE, AssumptionCache &AC, + TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const { + return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); +} + +void TargetTransformInfo::getUnrollingPreferences( + Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { + return TTIImpl->getUnrollingPreferences(L, SE, UP); +} + +bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { + return TTIImpl->isLegalAddImmediate(Imm); +} + +bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { + return TTIImpl->isLegalICmpImmediate(Imm); +} + +bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale, + unsigned AddrSpace, + Instruction *I) const { + return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale, AddrSpace, I); +} + +bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { + return TTIImpl->isLSRCostLess(C1, C2); +} + +bool TargetTransformInfo::canMacroFuseCmp() const { + return TTIImpl->canMacroFuseCmp(); +} + +bool TargetTransformInfo::canSaveCmp(Loop *L, BranchInst **BI, + ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *LibInfo) const { + return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); +} + +bool TargetTransformInfo::shouldFavorPostInc() const { + return TTIImpl->shouldFavorPostInc(); +} + +bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const { + return TTIImpl->shouldFavorBackedgeIndex(L); +} + +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedStore(DataType, Alignment); +} + +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, + MaybeAlign Alignment) const { + return TTIImpl->isLegalMaskedLoad(DataType, Alignment); +} + +bool TargetTransformInfo::isLegalNTStore(Type *DataType, + Align Alignment) const { + return TTIImpl->isLegalNTStore(DataType, Alignment); +} + +bool TargetTransformInfo::isLegalNTLoad(Type *DataType, Align Alignment) const { + return TTIImpl->isLegalNTLoad(DataType, Alignment); +} + +bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { + return TTIImpl->isLegalMaskedGather(DataType); +} + +bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { + return TTIImpl->isLegalMaskedScatter(DataType); +} + +bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const { + return TTIImpl->isLegalMaskedCompressStore(DataType); +} + +bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const { + return TTIImpl->isLegalMaskedExpandLoad(DataType); +} + +bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { + return TTIImpl->hasDivRemOp(DataType, IsSigned); +} + +bool TargetTransformInfo::hasVolatileVariant(Instruction *I, + unsigned AddrSpace) const { + return TTIImpl->hasVolatileVariant(I, AddrSpace); +} + +bool TargetTransformInfo::prefersVectorizedAddressing() const { + return TTIImpl->prefersVectorizedAddressing(); +} + +int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale, + unsigned AddrSpace) const { + int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale, AddrSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +bool TargetTransformInfo::LSRWithInstrQueries() const { + return TTIImpl->LSRWithInstrQueries(); +} + +bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { + return TTIImpl->isTruncateFree(Ty1, Ty2); +} + +bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const { + return TTIImpl->isProfitableToHoist(I); +} + +bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); } + +bool TargetTransformInfo::isTypeLegal(Type *Ty) const { + return TTIImpl->isTypeLegal(Ty); +} + +bool TargetTransformInfo::shouldBuildLookupTables() const { + return TTIImpl->shouldBuildLookupTables(); +} +bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const { + return TTIImpl->shouldBuildLookupTablesForConstant(C); +} + +bool TargetTransformInfo::useColdCCForColdCall(Function &F) const { + return TTIImpl->useColdCCForColdCall(F); +} + +unsigned TargetTransformInfo:: +getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const { + return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract); +} + +unsigned TargetTransformInfo:: +getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, + unsigned VF) const { + return TTIImpl->getOperandsScalarizationOverhead(Args, VF); +} + +bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const { + return TTIImpl->supportsEfficientVectorElementLoadStore(); +} + +bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const { + return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); +} + +TargetTransformInfo::MemCmpExpansionOptions +TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp); +} + +bool TargetTransformInfo::enableInterleavedAccessVectorization() const { + return TTIImpl->enableInterleavedAccessVectorization(); +} + +bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const { + return TTIImpl->enableMaskedInterleavedAccessVectorization(); +} + +bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const { + return TTIImpl->isFPVectorizationPotentiallyUnsafe(); +} + +bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) const { + return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, + Alignment, Fast); +} + +TargetTransformInfo::PopcntSupportKind +TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { + return TTIImpl->getPopcntSupport(IntTyWidthInBit); +} + +bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { + return TTIImpl->haveFastSqrt(Ty); +} + +bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { + return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty); +} + +int TargetTransformInfo::getFPOpCost(Type *Ty) const { + int Cost = TTIImpl->getFPOpCost(Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, + Type *Ty) const { + int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const { + return TTIImpl->getNumberOfRegisters(ClassID); +} + +unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const { + return TTIImpl->getRegisterClassForType(Vector, Ty); +} + +const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const { + return TTIImpl->getRegisterClassName(ClassID); +} + +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return TTIImpl->getRegisterBitWidth(Vector); +} + +unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { + return TTIImpl->getMinVectorRegisterBitWidth(); +} + +bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const { + return TTIImpl->shouldMaximizeVectorBandwidth(OptSize); +} + +unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const { + return TTIImpl->getMinimumVF(ElemWidth); +} + +bool TargetTransformInfo::shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { + return TTIImpl->shouldConsiderAddressTypePromotion( + I, AllowPromotionWithoutCommonHeader); +} + +unsigned TargetTransformInfo::getCacheLineSize() const { + return TTIImpl->getCacheLineSize(); +} + +llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level) + const { + return TTIImpl->getCacheSize(Level); +} + +llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity( + CacheLevel Level) const { + return TTIImpl->getCacheAssociativity(Level); +} + +unsigned TargetTransformInfo::getPrefetchDistance() const { + return TTIImpl->getPrefetchDistance(); +} + +unsigned TargetTransformInfo::getMinPrefetchStride() const { + return TTIImpl->getMinPrefetchStride(); +} + +unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const { + return TTIImpl->getMaxPrefetchIterationsAhead(); +} + +unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { + return TTIImpl->getMaxInterleaveFactor(VF); +} + +TargetTransformInfo::OperandValueKind +TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) { + OperandValueKind OpInfo = OK_AnyValue; + OpProps = OP_None; + + if (auto *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().isPowerOf2()) + OpProps = OP_PowerOf2; + return OK_UniformConstantValue; + } + + // A broadcast shuffle creates a uniform value. + // TODO: Add support for non-zero index broadcasts. + // TODO: Add support for different source vector width. + if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V)) + if (ShuffleInst->isZeroEltSplat()) + OpInfo = OK_UniformValue; + + const Value *Splat = getSplatValue(V); + + // Check for a splat of a constant or for a non uniform vector of constants + // and check if the constant(s) are all powers of two. + if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { + OpInfo = OK_NonUniformConstantValue; + if (Splat) { + OpInfo = OK_UniformConstantValue; + if (auto *CI = dyn_cast<ConstantInt>(Splat)) + if (CI->getValue().isPowerOf2()) + OpProps = OP_PowerOf2; + } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) { + OpProps = OP_PowerOf2; + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) + if (CI->getValue().isPowerOf2()) + continue; + OpProps = OP_None; + break; + } + } + } + + // Check for a splat of a uniform value. This is not loop aware, so return + // true only for the obviously uniform cases (argument, globalvalue) + if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) + OpInfo = OK_UniformValue; + + return OpInfo; +} + +int TargetTransformInfo::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo, + ArrayRef<const Value *> Args) const { + int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, + Type *SubTp) const { + int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src, const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, + VectorType *VecTy, + unsigned Index) const { + int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { + int Cost = TTIImpl->getCFInstrCost(Opcode); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy, const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace, + const Instruction *I) const { + assert ((I == nullptr || I->getOpcode() == Opcode) && + "Opcode should reflect passed instruction."); + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + int Cost = + TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, + unsigned Alignment) const { + int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, + Alignment); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, + bool UseMaskForGaps) const { + int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace, + UseMaskForCond, + UseMaskForGaps); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, + ScalarizationCostPassed); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys) const { + int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { + return TTIImpl->getNumberOfParts(Tp); +} + +int TargetTransformInfo::getAddressComputationCost(Type *Tp, + ScalarEvolution *SE, + const SCEV *Ptr) const { + int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getMemcpyCost(const Instruction *I) const { + int Cost = TTIImpl->getMemcpyCost(I); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwiseForm) const { + int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy, + bool IsPairwiseForm, + bool IsUnsigned) const { + int Cost = + TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + +unsigned +TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { + return TTIImpl->getCostOfKeepingLiveOverCall(Tys); +} + +bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, + MemIntrinsicInfo &Info) const { + return TTIImpl->getTgtMemIntrinsic(Inst, Info); +} + +unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { + return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); +} + +Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( + IntrinsicInst *Inst, Type *ExpectedType) const { + return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); +} + +Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context, + Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const { + return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign, + DestAlign); +} + +void TargetTransformInfo::getMemcpyLoopResidualLoweringType( + SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const { + TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAlign, DestAlign); +} + +bool TargetTransformInfo::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + return TTIImpl->areInlineCompatible(Caller, Callee); +} + +bool TargetTransformInfo::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl<Argument *> &Args) const { + return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args); +} + +bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, + Type *Ty) const { + return TTIImpl->isIndexedLoadLegal(Mode, Ty); +} + +bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode, + Type *Ty) const { + return TTIImpl->isIndexedStoreLegal(Mode, Ty); +} + +unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { + return TTIImpl->getLoadStoreVecRegBitWidth(AS); +} + +bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const { + return TTIImpl->isLegalToVectorizeLoad(LI); +} + +bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { + return TTIImpl->isLegalToVectorizeStore(SI); +} + +bool TargetTransformInfo::isLegalToVectorizeLoadChain( + unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, + AddrSpace); +} + +bool TargetTransformInfo::isLegalToVectorizeStoreChain( + unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, + AddrSpace); +} + +unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, + unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); +} + +unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, + unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); +} + +bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, + Type *Ty, ReductionFlags Flags) const { + return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); +} + +bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { + return TTIImpl->shouldExpandReduction(II); +} + +unsigned TargetTransformInfo::getGISelRematGlobalCost() const { + return TTIImpl->getGISelRematGlobalCost(); +} + +int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { + return TTIImpl->getInstructionLatency(I); +} + +static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, + unsigned Level) { + // We don't need a shuffle if we just want to have element 0 in position 0 of + // the vector. + if (!SI && Level == 0 && IsLeft) + return true; + else if (!SI) + return false; + + SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); + + // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether + // we look at the left or right side. + for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) + Mask[i] = val; + + SmallVector<int, 16> ActualMask = SI->getShuffleMask(); + return Mask == ActualMask; +} + +namespace { +/// Kind of the reduction data. +enum ReductionKind { + RK_None, /// Not a reduction. + RK_Arithmetic, /// Binary reduction data. + RK_MinMax, /// Min/max reduction data. + RK_UnsignedMinMax, /// Unsigned min/max reduction data. +}; +/// Contains opcode + LHS/RHS parts of the reduction operations. +struct ReductionData { + ReductionData() = delete; + ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) + : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { + assert(Kind != RK_None && "expected binary or min/max reduction only."); + } + unsigned Opcode = 0; + Value *LHS = nullptr; + Value *RHS = nullptr; + ReductionKind Kind = RK_None; + bool hasSameData(ReductionData &RD) const { + return Kind == RD.Kind && Opcode == RD.Opcode; + } +}; +} // namespace + +static Optional<ReductionData> getReductionData(Instruction *I) { + Value *L, *R; + if (m_BinOp(m_Value(L), m_Value(R)).match(I)) + return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + if (auto *SI = dyn_cast<SelectInst>(I)) { + if (m_SMin(m_Value(L), m_Value(R)).match(SI) || + m_SMax(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast<CmpInst>(SI->getCondition()); + return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + } + if (m_UMin(m_Value(L), m_Value(R)).match(SI) || + m_UMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast<CmpInst>(SI->getCondition()); + return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); + } + } + return llvm::None; +} + +static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, + unsigned Level, + unsigned NumLevels) { + // Match one level of pairwise operations. + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + if (!I) + return RK_None; + + assert(I->getType()->isVectorTy() && "Expecting a vector type"); + + Optional<ReductionData> RD = getReductionData(I); + if (!RD) + return RK_None; + + ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS); + if (!LS && Level) + return RK_None; + ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS); + if (!RS && Level) + return RK_None; + + // On level 0 we can omit one shufflevector instruction. + if (!Level && !RS && !LS) + return RK_None; + + // Shuffle inputs must match. + Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; + Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; + Value *NextLevelOp = nullptr; + if (NextLevelOpR && NextLevelOpL) { + // If we have two shuffles their operands must match. + if (NextLevelOpL != NextLevelOpR) + return RK_None; + + NextLevelOp = NextLevelOpL; + } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { + // On the first level we can omit the shufflevector <0, undef,...>. So the + // input to the other shufflevector <1, undef> must match with one of the + // inputs to the current binary operation. + // Example: + // %NextLevelOpL = shufflevector %R, <1, undef ...> + // %BinOp = fadd %NextLevelOpL, %R + if (NextLevelOpL && NextLevelOpL != RD->RHS) + return RK_None; + else if (NextLevelOpR && NextLevelOpR != RD->LHS) + return RK_None; + + NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; + } else + return RK_None; + + // Check that the next levels binary operation exists and matches with the + // current one. + if (Level + 1 != NumLevels) { + Optional<ReductionData> NextLevelRD = + getReductionData(cast<Instruction>(NextLevelOp)); + if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) + return RK_None; + } + + // Shuffle mask for pairwise operation must match. + if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) + return RK_None; + } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) + return RK_None; + } else { + return RK_None; + } + + if (++Level == NumLevels) + return RD->Kind; + + // Match next level. + return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level, + NumLevels); +} + +static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional<ReductionData> RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = RdxStart->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffle,shuffle,add triples like the following + // that builds a pairwise reduction tree. + // + // (X0, X1, X2, X3) + // (X0 + X1, X2 + X3, undef, undef) + // ((X0 + X1) + (X2 + X3), undef, undef, undef) + // + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == + RK_None) + return RK_None; + + Opcode = RD->Opcode; + Ty = VecTy; + + return RD->Kind; +} + +static std::pair<Value *, ShuffleVectorInst *> +getShuffleAndOtherOprd(Value *L, Value *R) { + ShuffleVectorInst *S = nullptr; + + if ((S = dyn_cast<ShuffleVectorInst>(L))) + return std::make_pair(R, S); + + S = dyn_cast<ShuffleVectorInst>(R); + return std::make_pair(L, S); +} + +static ReductionKind +matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional<ReductionData> RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffles and adds like the following matching one + // fadd, shuffle vector pair at a time. + // + // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> + // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf + // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + + unsigned MaskStart = 1; + Instruction *RdxOp = RdxStart; + SmallVector<int, 32> ShuffleMask(NumVecElems, 0); + unsigned NumVecElemsRemain = NumVecElems; + while (NumVecElemsRemain - 1) { + // Check for the right reduction operation. + if (!RdxOp) + return RK_None; + Optional<ReductionData> RDLevel = getReductionData(RdxOp); + if (!RDLevel || !RDLevel->hasSameData(*RD)) + return RK_None; + + Value *NextRdxOp; + ShuffleVectorInst *Shuffle; + std::tie(NextRdxOp, Shuffle) = + getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); + + // Check the current reduction operation and the shuffle use the same value. + if (Shuffle == nullptr) + return RK_None; + if (Shuffle->getOperand(0) != NextRdxOp) + return RK_None; + + // Check that shuffle masks matches. + for (unsigned j = 0; j != MaskStart; ++j) + ShuffleMask[j] = MaskStart + j; + // Fill the rest of the mask with -1 for undef. + std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); + + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + if (ShuffleMask != Mask) + return RK_None; + + RdxOp = dyn_cast<Instruction>(NextRdxOp); + NumVecElemsRemain /= 2; + MaskStart *= 2; + } + + Opcode = RD->Opcode; + Ty = VecTy; + return RD->Kind; +} + +int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { + switch (I->getOpcode()) { + case Instruction::GetElementPtr: + return getUserCost(I); + + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + TargetTransformInfo::OperandValueKind Op1VK, Op2VK; + TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; + Op1VK = getOperandInfo(I->getOperand(0), Op1VP); + Op2VK = getOperandInfo(I->getOperand(1), Op2VP); + SmallVector<const Value *, 2> Operands(I->operand_values()); + return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + Op1VP, Op2VP, Operands); + } + case Instruction::FNeg: { + TargetTransformInfo::OperandValueKind Op1VK, Op2VK; + TargetTransformInfo::OperandValueProperties Op1VP, Op2VP; + Op1VK = getOperandInfo(I->getOperand(0), Op1VP); + Op2VK = OK_AnyValue; + Op2VP = OP_None; + SmallVector<const Value *, 2> Operands(I->operand_values()); + return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + Op1VP, Op2VP, Operands); + } + case Instruction::Select: { + const SelectInst *SI = cast<SelectInst>(I); + Type *CondTy = SI->getCondition()->getType(); + return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); + } + case Instruction::Store: { + const StoreInst *SI = cast<StoreInst>(I); + Type *ValTy = SI->getValueOperand()->getType(); + return getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace(), I); + } + case Instruction::Load: { + const LoadInst *LI = cast<LoadInst>(I); + return getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace(), I); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); + } + case Instruction::ExtractElement: { + const ExtractElementInst * EEI = cast<ExtractElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + unsigned ReduxOpCode; + Type *ReduxType; + + switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/false); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); + case RK_None: + break; + } + + switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/true); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); + case RK_None: + break; + } + + return getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); + } + case Instruction::InsertElement: { + const InsertElementInst * IE = cast<InsertElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } + case Instruction::ExtractValue: + return 0; // Model all ExtractValue nodes as free. + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + Type *Ty = Shuffle->getType(); + Type *SrcTy = Shuffle->getOperand(0)->getType(); + + // TODO: Identify and add costs for insert subvector, etc. + int SubIndex; + if (Shuffle->isExtractSubvectorMask(SubIndex)) + return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty); + + if (Shuffle->changesLength()) + return -1; + + if (Shuffle->isIdentity()) + return 0; + + if (Shuffle->isReverse()) + return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr); + + if (Shuffle->isSelect()) + return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr); + + if (Shuffle->isTranspose()) + return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr); + + if (Shuffle->isZeroEltSplat()) + return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr); + + if (Shuffle->isSingleSource()) + return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr); + + return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr); + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + SmallVector<Value *, 4> Args(II->arg_operands()); + + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(II)) + FMF = FPMO->getFastMathFlags(); + + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Args, FMF); + } + return -1; + default: + // We don't have any information on this instruction. + return -1; + } +} + +TargetTransformInfo::Concept::~Concept() {} + +TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} + +TargetIRAnalysis::TargetIRAnalysis( + std::function<Result(const Function &)> TTICallback) + : TTICallback(std::move(TTICallback)) {} + +TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F, + FunctionAnalysisManager &) { + return TTICallback(F); +} + +AnalysisKey TargetIRAnalysis::Key; + +TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) { + return Result(F.getParent()->getDataLayout()); +} + +// Register the basic pass. +INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti", + "Target Transform Information", false, true) +char TargetTransformInfoWrapperPass::ID = 0; + +void TargetTransformInfoWrapperPass::anchor() {} + +TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass() + : ImmutablePass(ID) { + initializeTargetTransformInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass( + TargetIRAnalysis TIRA) + : ImmutablePass(ID), TIRA(std::move(TIRA)) { + initializeTargetTransformInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) { + FunctionAnalysisManager DummyFAM; + TTI = TIRA.run(F, DummyFAM); + return *TTI; +} + +ImmutablePass * +llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) { + return new TargetTransformInfoWrapperPass(std::move(TIRA)); +} |