diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 135 |
1 files changed, 74 insertions, 61 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0a41f35f93208..5303d7a406ad4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4753,7 +4753,7 @@ static void scaleShuffleMask(int Scale, ArrayRef<int> Mask, SmallVectorImpl<int> &ScaledMask) { assert(0 < Scale && "Unexpected scaling factor"); int NumElts = Mask.size(); - ScaledMask.assign(NumElts * Scale, -1); + ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1); for (int i = 0; i != NumElts; ++i) { int M = Mask[i]; @@ -5848,17 +5848,39 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, return true; } case ISD::SCALAR_TO_VECTOR: { - // Match against a scalar_to_vector of an extract from a similar vector. + // Match against a scalar_to_vector of an extract from a vector, + // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar. SDValue N0 = N.getOperand(0); - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - N0.getOperand(0).getValueType() != VT || - !isa<ConstantSDNode>(N0.getOperand(1)) || - NumElts <= N0.getConstantOperandVal(1) || - !N->isOnlyUserOf(N0.getNode())) + SDValue SrcExtract; + + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getValueType() == VT) { + SrcExtract = N0; + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRW && + cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16); + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRB && + cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); + } + + if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) || + NumElts <= SrcExtract.getConstantOperandVal(1)) return false; - Ops.push_back(N0.getOperand(0)); - Mask.push_back(N0.getConstantOperandVal(1)); - Mask.append(NumElts - 1, SM_SentinelUndef); + + SDValue SrcVec = SrcExtract.getOperand(0); + EVT SrcVT = SrcVec.getValueType(); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; + + Ops.push_back(SrcVec); + Mask.push_back(SrcExtract.getConstantOperandVal(1)); + Mask.append(NumZeros, SM_SentinelZero); + Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; } case X86ISD::PINSRB: @@ -6542,12 +6564,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue, APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { - assert((ScalarSize == 32 || ScalarSize == 64) && - "Unsupported floating point scalar size"); - if (ScalarSize == 32) - Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat()); - else - Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble()); + if (ScalarSize == 32) { + Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); + } else { + assert(ScalarSize == 64 && "Unsupported floating point scalar size"); + Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); + } } else Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); ConstantVec.push_back(Const); @@ -6633,11 +6655,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, // AVX have support for 32 and 64 bit broadcast for floats only. // No 64bit integer in 32bit subtarget. MVT CVT = MVT::getFloatingPointVT(SplatBitSize); - Constant *C = SplatBitSize == 32 - ? ConstantFP::get(Type::getFloatTy(*Ctx), - SplatValue.bitsToFloat()) - : ConstantFP::get(Type::getDoubleTy(*Ctx), - SplatValue.bitsToDouble()); + // Lower the splat via APFloat directly, to avoid any conversion. + Constant *C = + SplatBitSize == 32 + ? ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEsingle(), SplatValue)) + : ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEdouble(), SplatValue)); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; @@ -8003,7 +8027,7 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) { static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef<int> Mask, SmallVectorImpl<int> &RepeatedMask) { - int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); + auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); RepeatedMask.assign(LaneSize, -1); int Size = Mask.size(); for (int i = 0; i < Size; ++i) { @@ -16997,7 +17021,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); - ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); + ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get(); bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); @@ -17024,18 +17048,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is // available. SDValue Cmp; - unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); + unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1); if (SSECC == 8) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; - if (SetCCOpcode == ISD::SETUEQ) { + if (Cond == ISD::SETUEQ) { CC0 = 3; // UNORD CC1 = 0; // EQ CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FOR) : static_cast<unsigned>(ISD::OR); } else { - assert(SetCCOpcode == ISD::SETONE); + assert(Cond == ISD::SETONE); CC0 = 7; // ORD CC1 = 4; // NEQ CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FAND) : @@ -17082,7 +17106,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // 2. The original operand type has been promoted to a 256-bit vector. // // Note that condition 2. only applies for AVX targets. - SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); + SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond); return DAG.getZExtOrTrunc(NewOp, dl, VT); } @@ -17122,7 +17146,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) { // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; - switch (SetCCOpcode) { + switch (Cond) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETULT: case ISD::SETLT: CmpMode = 0x00; break; @@ -17137,60 +17161,49 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } // Are we comparing unsigned or signed integers? - unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) - ? X86ISD::VPCOMU : X86ISD::VPCOM; + unsigned Opc = + ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CmpMode, dl, MVT::i8)); } - // We are handling one of the integer comparisons here. Since SSE only has + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc; - bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; - bool Subus = false; - - switch (SetCCOpcode) { - default: llvm_unreachable("Unexpected SETCC condition"); - case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; - case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGT: Opc = X86ISD::PCMPGT; break; - case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETLE: Opc = X86ISD::PCMPGT; - Invert = true; break; - case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGT: Opc = X86ISD::PCMPGT; - FlipSigns = true; break; - case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULE: Opc = X86ISD::PCMPGT; - FlipSigns = true; Invert = true; break; - } + unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ + : X86ISD::PCMPGT; + bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || + Cond == ISD::SETGE || Cond == ISD::SETUGE; + bool Invert = Cond == ISD::SETNE || + (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); + bool FlipSigns = ISD::isUnsignedIntSetCC(Cond); // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); - bool hasMinMax = - (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) - || (Subtarget.hasSSE2() && (VET == MVT::i8)); - - if (hasMinMax) { - switch (SetCCOpcode) { + bool HasMinMax = + (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || + (Subtarget.hasSSE2() && (VET == MVT::i8)); + bool MinMax = false; + if (HasMinMax) { + switch (Cond) { default: break; case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break; case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break; } - if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } + if (MinMax) + Swap = Invert = FlipSigns = false; } - bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); - if (!MinMax && hasSubus) { + bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); + bool Subus = false; + if (!MinMax && HasSubus) { // As another special case, use PSUBUS[BW] when it's profitable. E.g. for // Op0 u<= Op1: // t = psubus Op0, Op1 // pcmpeq t, <0..0> - switch (SetCCOpcode) { + switch (Cond) { default: break; case ISD::SETULT: { // If the comparison is against a constant we can turn this into a |