diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /lib/Target/X86/X86TargetTransformInfo.cpp | |
parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) |
Diffstat (limited to 'lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | lib/Target/X86/X86TargetTransformInfo.cpp | 106 |
1 files changed, 73 insertions, 33 deletions
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 5715d826862e..b742fb472372 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -78,7 +78,7 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { return 8; } -unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) { +unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasAVX512()) return 512; @@ -95,6 +95,10 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) { return 32; } +unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { + return getRegisterBitWidth(true); +} + unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { // If the loop will not be vectorized, don't interleave the loop. // Let regular unroll to unroll the loop, which saves the overflow @@ -114,7 +118,7 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { } int X86TTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, + unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, @@ -207,6 +211,10 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry AVX512UniformConstCostTable[] = { + { ISD::SRA, MVT::v2i64, 1 }, + { ISD::SRA, MVT::v4i64, 1 }, + { ISD::SRA, MVT::v8i64, 1 }, + { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence }; @@ -319,6 +327,14 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; static const CostTblEntry AVX512BWCostTable[] = { + { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw + { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw + { ISD::SRA, MVT::v8i16, 1 }, // vpsravw + + { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw + { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw + { ISD::SRA, MVT::v16i16, 1 }, // vpsravw + { ISD::SHL, MVT::v32i16, 1 }, // vpsllvw { ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw { ISD::SRA, MVT::v32i16, 1 }, // vpsravw @@ -347,8 +363,12 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SHL, MVT::v16i32, 1 }, { ISD::SRL, MVT::v16i32, 1 }, { ISD::SRA, MVT::v16i32, 1 }, + { ISD::SHL, MVT::v8i64, 1 }, { ISD::SRL, MVT::v8i64, 1 }, + + { ISD::SRA, MVT::v2i64, 1 }, + { ISD::SRA, MVT::v4i64, 1 }, { ISD::SRA, MVT::v8i64, 1 }, { ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence. @@ -595,7 +615,6 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. - { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul. { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. @@ -804,7 +823,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw - { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb + { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb + + { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq + { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd + { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2 * vpshufb + // + vpblendvb + { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 } // vperm2i128 + 2 * vpshufb + // + vpblendvb }; if (ST->hasAVX2()) @@ -861,7 +887,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por - { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 3 }, // pshufb + pshufb + por + + { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb + { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 } // pshufb }; if (ST->hasSSSE3()) @@ -886,7 +915,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por - { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por + { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por + + { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd + { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 } // pshufd }; if (ST->hasSSE2()) @@ -906,7 +938,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } -int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { +int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1272,7 +1305,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return BaseT::getCastInstrCost(Opcode, Dst, Src); } -int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { +int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { // Legalize the type. std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -1338,11 +1372,12 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) { if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) return LT.first * Entry->Cost; - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Type *> Tys, FastMathFlags FMF) { + ArrayRef<Type *> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) { // Costs should match the codegen from: // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll // BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll @@ -1418,8 +1453,8 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/ }; static const CostTblEntry SSE42CostTbl[] = { - { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/ - { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/ + { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/ + { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/ }; static const CostTblEntry SSSE3CostTbl[] = { { ISD::BITREVERSE, MVT::v2i64, 5 }, @@ -1443,6 +1478,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::CTTZ, MVT::v16i8, 9 } }; static const CostTblEntry SSE2CostTbl[] = { + { ISD::BITREVERSE, MVT::v2i64, 29 }, + { ISD::BITREVERSE, MVT::v4i32, 27 }, + { ISD::BITREVERSE, MVT::v8i16, 27 }, + { ISD::BITREVERSE, MVT::v16i8, 20 }, { ISD::BSWAP, MVT::v2i64, 7 }, { ISD::BSWAP, MVT::v4i32, 7 }, { ISD::BSWAP, MVT::v8i16, 7 }, @@ -1462,8 +1501,16 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/ }; static const CostTblEntry SSE1CostTbl[] = { - { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/ - { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/ + { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/ + { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/ + }; + static const CostTblEntry X64CostTbl[] = { // 64-bit targets + { ISD::BITREVERSE, MVT::i64, 14 } + }; + static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets + { ISD::BITREVERSE, MVT::i32, 14 }, + { ISD::BITREVERSE, MVT::i16, 14 }, + { ISD::BITREVERSE, MVT::i8, 11 } }; unsigned ISD = ISD::DELETED_NODE; @@ -1523,12 +1570,19 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * Entry->Cost; - return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF); + if (ST->is64Bit()) + if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed); } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef<Value *> Args, FastMathFlags FMF) { - return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF); + ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { + return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF); } int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { @@ -1562,22 +1616,8 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; } -int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { - assert (Ty->isVectorTy() && "Can only scalarize vectors"); - int Cost = 0; - - for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { - if (Insert) - Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i); - if (Extract) - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i); - } - - return Cost; -} - int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, const Instruction *I) { // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast<VectorType>(Src)) { unsigned NumElem = VTy->getVectorNumElements(); @@ -2132,7 +2172,7 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() { // TODO: We expect this to be beneficial regardless of arch, // but there are currently some unexplained performance artifacts on Atom. // As a temporary solution, disable on Atom. - return !(ST->isAtom() || ST->isSLM()); + return !(ST->isAtom()); } // Get estimation for interleaved load/store operations and strided load. |