diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMTargetTransformInfo.h')
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 88 |
1 files changed, 67 insertions, 21 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index 880588adfdfdd..7bf6de4bffe07 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -38,6 +38,16 @@ class ScalarEvolution; class Type; class Value; +namespace TailPredication { + enum Mode { + Disabled = 0, + EnabledNoReductions, + Enabled, + ForceEnabledNoReductions, + ForceEnabled + }; +} + class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { using BaseT = BasicTTIImplBase<ARMTTIImpl>; using TTI = TargetTransformInfo; @@ -47,13 +57,13 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const ARMSubtarget *ST; const ARMTargetLowering *TLI; - // Currently the following features are excluded from InlineFeatureWhitelist. + // Currently the following features are excluded from InlineFeaturesAllowed. // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 // Depending on whether they are set or unset, different // instructions/registers are available. For example, inlining a callee with // -thumb-mode in a caller with +thumb-mode, may cause the assembler to // fail if the callee uses ARM only instructions, e.g. in inline asm. - const FeatureBitset InlineFeatureWhitelist = { + const FeatureBitset InlineFeaturesAllowed = { ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, @@ -93,11 +103,8 @@ public: bool enableInterleavedAccessVectorization() { return true; } - bool shouldFavorBackedgeIndex(const Loop *L) const { - if (L->getHeader()->getParent()->hasOptSize()) - return false; - return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1; - } + bool shouldFavorBackedgeIndex(const Loop *L) const; + bool shouldFavorPostInc() const; /// Floating-point computation using ARMv8 AArch32 Advanced /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD @@ -113,9 +120,10 @@ public: Type *Ty); using BaseT::getIntImmCost; - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind); /// @} @@ -153,31 +161,57 @@ public: return ST->getMaxInterleaveFactor(); } - bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment); + bool isProfitableLSRChainElement(Instruction *I); + + bool isLegalMaskedLoad(Type *DataTy, Align Alignment); - bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) { + bool isLegalMaskedStore(Type *DataTy, Align Alignment) { return isLegalMaskedLoad(DataTy, Alignment); } - bool isLegalMaskedGather(Type *Ty, MaybeAlign Alignment); + bool isLegalMaskedGather(Type *Ty, Align Alignment); - bool isLegalMaskedScatter(Type *Ty, MaybeAlign Alignment) { return false; } + bool isLegalMaskedScatter(Type *Ty, Align Alignment) { + return isLegalMaskedGather(Ty, Alignment); + } int getMemcpyCost(const Instruction *I); - int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; bool shouldExpandReduction(const IntrinsicInst *II) const { - return false; + switch (II->getIntrinsicID()) { + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: + // We don't have legalization support for ordered FP reductions. + if (!II->getFastMathFlags().allowReassoc()) + return true; + // Can't legalize reductions with soft floats. + return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs(); + + case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::experimental_vector_reduce_fmax: + // Can't legalize reductions with soft floats, and NoNan will create + // fminimum which we do not know how to lower. + return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() || + !II->getFastMathFlags().noNaNs(); + + default: + // Don't expand anything else, let legalization deal with it. + return false; + } } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); @@ -187,6 +221,7 @@ public: int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -195,13 +230,20 @@ public: const Instruction *CxtI = nullptr); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); - int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, - ArrayRef<unsigned> Indices, unsigned Alignment, - unsigned AddressSpace, - bool UseMaskForCond = false, - bool UseMaskForGaps = false); + int getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + Align Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + bool UseMaskForCond = false, bool UseMaskForGaps = false); + + unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + const Value *Ptr, bool VariableMask, + Align Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); bool isLoweredToCall(const Function *F); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, @@ -217,6 +259,10 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool emitGetActiveLaneMask() const; + + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); bool shouldBuildLookupTablesForConstant(Constant *C) const { // In the ROPI and RWPI relocation models we can't have pointers to global // variables or functions in constant data, so don't convert switches to |