diff options
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 214 |
1 files changed, 124 insertions, 90 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9ee2234595f9..11c08292518a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" @@ -79,6 +80,17 @@ static cl::opt<int> ExperimentalPrefLoopAlignment( " of the loop header PC will be 0)."), cl::Hidden); +/// Call this when the user attempts to do something unsupported, like +/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike +/// report_fatal_error, so calling code should attempt to recover without +/// crashing. +static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, + const char *Msg) { + MachineFunction &MF = DAG.getMachineFunction(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc())); +} + X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -1381,7 +1393,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); @@ -1445,8 +1457,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); - setOperationAction(ISD::VSELECT, MVT::v32i16, Legal); - setOperationAction(ISD::VSELECT, MVT::v64i8, Legal); setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); @@ -1479,7 +1489,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); @@ -2207,15 +2217,17 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // or SSE or MMX vectors. if ((ValVT == MVT::f32 || ValVT == MVT::f64 || VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && - (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { - report_fatal_error("SSE register return with SSE disabled"); + (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { + errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. + } else if (ValVT == MVT::f64 && + (Subtarget.is64Bit() && !Subtarget.hasSSE2())) { + // Likewise we can't return F64 values with SSE1 only. gcc does so, but + // llvm-gcc has never done it right and no one has noticed, so this + // should be OK for now. + errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } - // Likewise we can't return F64 values with SSE1 only. gcc does so, but - // llvm-gcc has never done it right and no one has noticed, so this - // should be OK for now. - if (ValVT == MVT::f64 && - (Subtarget.is64Bit() && !Subtarget.hasSSE2())) - report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -2528,7 +2540,8 @@ SDValue X86TargetLowering::LowerCallResult( // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { - report_fatal_error("SSE register return with SSE disabled"); + errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } // If we prefer to use the value in xmm registers, copy it out as f80 and @@ -3415,8 +3428,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!IsSibcall) - Chain = DAG.getCALLSEQ_START( - Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, + NumBytes - NumBytesToPush, dl); SDValue RetAddrFrIdx; // Load return address for tail calls. @@ -6912,9 +6925,9 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" if (IsSplat) - return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx), - DAG.getConstant(1, dl, VT), - DAG.getConstant(0, dl, VT)); + return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx), + DAG.getConstant(1, dl, VT), + DAG.getConstant(0, dl, VT)); // insert elements one by one SDValue DstVec; @@ -8386,9 +8399,9 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, Subtarget, DAG, DL); SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; - return DAG.getNode(ISD::VSELECT, DL, VT, VMask, - DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), - ZeroVector); + return DAG.getSelect(DL, VT, VMask, + DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), + ZeroVector); } static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, @@ -8748,8 +8761,9 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); return DAG.getBitcast( - VT, DAG.getNode(ISD::VSELECT, DL, BlendVT, - DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2)); + VT, + DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), + V1, V2)); } case MVT::v16f32: case MVT::v8f64: @@ -13817,6 +13831,11 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode())) return SDValue(); + // If this VSELECT has a vector if i1 as a mask, it will be directly matched + // with patterns on the mask registers on AVX-512. + if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1) + return Op; + // Try to lower this to a blend-style vector shuffle. This can handle all // constant condition cases. if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) @@ -13826,10 +13845,30 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget.hasSSE41()) return SDValue(); + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + + // If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition + // into an i1 condition so that we can use the mask-based 512-bit blend + // instructions. + if (VT.getSizeInBits() == 512) { + SDValue Cond = Op.getOperand(0); + // The vNi1 condition case should be handled above as it can be trivially + // lowered. + assert(Cond.getValueType().getScalarSizeInBits() == + VT.getScalarSizeInBits() && + "Should have a size-matched integer condition!"); + // Build a mask by testing the condition against itself (tests for zero). + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond); + // Now return a new VSELECT using the mask. + return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2)); + } + // Only some types will be legal on some subtargets. If we can emit a legal // VSELECT-matching blend, return Op, and but if we need to expand, return // a null value. - switch (Op.getSimpleValueType().SimpleTy) { + switch (VT.SimpleTy) { default: // Most of the vector types have blends past SSE4.1. return Op; @@ -14725,7 +14764,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // location. SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); SDValue Args[] = { Chain, Offset }; Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), @@ -15348,8 +15387,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Get a pointer to FF if the sign bit was set, or to 0 otherwise. SDValue Zero = DAG.getIntPtrConstant(0, dl); SDValue Four = DAG.getIntPtrConstant(4, dl); - SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, - Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); // Load the value out, extending it from f32 to f80. @@ -15621,7 +15659,7 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SDValue Zero = DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT); - SDValue SelectedVal = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero); + SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero); if (VT == ExtVT) return SelectedVal; return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal); @@ -16713,7 +16751,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, if (BitWidth > AndBitWidth) { KnownBits Known; DAG.computeKnownBits(Op0, Known); - if (Known.Zero.countLeadingOnes() < BitWidth - AndBitWidth) + if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth) return SDValue(); } LHS = Op1; @@ -17455,7 +17493,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; VCmp = DAG.getBitcast(VCmpVT, VCmp); - SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); + SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VSel, DAG.getIntPtrConstant(0, DL)); @@ -17483,9 +17521,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) Op2Scalar = Op2.getOperand(0); if (Op1Scalar.getNode() && Op2Scalar.getNode()) { - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, - Op1Scalar.getValueType(), - Cond, Op1Scalar, Op2Scalar); + SDValue newSelect = DAG.getSelect(DL, Op1Scalar.getValueType(), Cond, + Op1Scalar, Op2Scalar); if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, newSelect); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); @@ -17500,8 +17537,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getUNDEF(MVT::v8i1), Op1, zeroConst); Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1, DAG.getUNDEF(MVT::v8i1), Op2, zeroConst); - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::v8i1, - Cond, Op1, Op2); + SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst); } @@ -17770,7 +17806,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, } else { SDValue NegOne = getOnesVector(ExtVT, DAG, dl); SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl); - V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); + V = DAG.getSelect(dl, ExtVT, In, NegOne, Zero); if (ExtVT == VT) return V; } @@ -18572,7 +18608,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); bool Is64Bit = Subtarget.is64Bit(); MVT SPTy = getPointerTy(DAG.getDataLayout()); @@ -19021,8 +19057,10 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (isAllOnesConstant(Mask)) - return Op; + + if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask)) + if (MaskConst->getZExtValue() & 0x1) + return Op; MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -19081,7 +19119,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, // registration, or the .set_setframe offset. MCSymbol *OffsetSym = MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(Fn->getName())); + GlobalValue::dropLLVMManglingEscape(Fn->getName())); SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); SDValue ParentFrameOffset = DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); @@ -19683,12 +19721,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } - case CONVERT_MASK_TO_VEC: { - SDValue Mask = Op.getOperand(1); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return DAG.getNode(IntrData->Opc0, dl, VT, VMask); - } case BRCST_SUBVEC_TO_VEC: { SDValue Src = Op.getOperand(1); SDValue Passthru = Op.getOperand(2); @@ -19932,7 +19964,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Op1 = Op.getOperand(1); auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( - GlobalValue::getRealLinkageName(Fn->getName())); + GlobalValue::dropLLVMManglingEscape(Fn->getName())); // Generate a simple absolute symbol reference. This intrinsic is only // supported on 32-bit Windows, which isn't PIC. @@ -21741,6 +21773,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); SDValue Ex = DAG.getBitcast(ExVT, R); + // ashr(R, 63) === cmp_slt(R, 0) + if (ShiftAmt == 63 && Subtarget.hasSSE42()) { + assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && + "Unsupported PCMPGT op"); + return DAG.getNode(X86ISD::PCMPGT, dl, VT, + getZeroVector(VT, Subtarget, DAG, dl), R); + } + if (ShiftAmt >= 32) { // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32. SDValue Upper = @@ -21839,10 +21879,19 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } // Special case in 32-bit mode, where i64 is expanded into high and low parts. + // TODO: Replace constant extraction with getTargetConstantBitsFromNode. if (!Subtarget.is64Bit() && !Subtarget.hasXOP() && (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) || (Subtarget.hasAVX512() && VT == MVT::v8i64))) { + // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant. + unsigned SubVectorScale = 1; + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SubVectorScale = + Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits(); + Amt = Amt.getOperand(0); + } + // Peek through any splat that was introduced for i64 shift vectorization. int SplatIndex = -1; if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode())) @@ -21859,7 +21908,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / - VT.getVectorNumElements(); + (SubVectorScale * VT.getVectorNumElements()); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); @@ -22233,23 +22282,21 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel); - return DAG.getBitcast(SelVT, - DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } else if (Subtarget.hasSSE41()) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); - return DAG.getBitcast(SelVT, - DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl); SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); - return DAG.getNode(ISD::VSELECT, dl, SelVT, C, V0, V1); + return DAG.getSelect(dl, SelVT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 5; @@ -22371,15 +22418,14 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V0 = DAG.getBitcast(ExtVT, V0); V1 = DAG.getBitcast(ExtVT, V1); Sel = DAG.getBitcast(ExtVT, Sel); - return DAG.getBitcast( - VT, DAG.getNode(ISD::VSELECT, dl, ExtVT, Sel, V0, V1)); + return DAG.getBitcast(VT, DAG.getSelect(dl, ExtVT, Sel, V0, V1)); } // On pre-SSE41 targets we splat the sign bit - a negative value will // set all bits of the lanes to true and VSELECT uses that in // its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue C = DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT)); - return DAG.getNode(ISD::VSELECT, dl, VT, C, V0, V1); + return DAG.getSelect(dl, VT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 12; @@ -23296,9 +23342,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); - Type *RetTy = isF64 - ? (Type*)StructType::get(ArgTy, ArgTy, nullptr) - : (Type*)VectorType::get(ArgTy, 4); + Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) + : (Type *)VectorType::get(ArgTy, 4); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) @@ -25779,7 +25824,7 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, // Emit CALLSEQ_START right before the instruction. unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); MachineInstrBuilder CallseqStart = - BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0); + BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); // Emit CALLSEQ_END right after the instruction. @@ -26517,7 +26562,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); - + case TargetOpcode::PATCHABLE_EVENT_CALL: // Do nothing here, handle in xray instrumentation pass. return BB; @@ -29532,7 +29577,7 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getAllOnesConstant(DL, CondVT)); // Vselect cond, op1, op2 = Vselect not(cond), op2, op1 - return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); + return DAG.getSelect(DL, VT, CondNew, RHS, LHS); } // To use the condition operand as a bitwise mask, it must have elements that @@ -30015,7 +30060,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE; Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0), Cond.getOperand(1), NewCC); - return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS); + return DAG.getSelect(DL, VT, Cond, LHS, RHS); } } } @@ -31561,20 +31606,22 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, // (sub (xor X, M), M) static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - assert(N->getOpcode() == ISD::OR); + assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - if (!((VT == MVT::v2i64) || (VT == MVT::v4i64 && Subtarget.hasInt256()))) + if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || + (VT.is256BitVector() && Subtarget.hasInt256()))) return SDValue(); - assert(Subtarget.hasSSE2() && "Unexpected i64 vector without SSE2!"); - // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::ANDNP) + // Canonicalize AND to LHS. + if (N1.getOpcode() == ISD::AND) std::swap(N0, N1); + // TODO: Attempt to match against AND(XOR(-1,X),Y) as well, waiting for + // ANDNP combine allows other combines to happen that prevent matching. if (N0.getOpcode() != ISD::AND || N1.getOpcode() != X86ISD::ANDNP) return SDValue(); @@ -31596,20 +31643,10 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, Y = peekThroughBitcasts(Y); EVT MaskVT = Mask.getValueType(); - - // Validate that the Mask operand is a vector sra node. - // FIXME: what to do for bytes, since there is a psignb/pblendvb, but - // there is no psrai.b unsigned EltBits = MaskVT.getScalarSizeInBits(); - unsigned SraAmt = ~0; - if (Mask.getOpcode() == ISD::SRA) { - if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1))) - if (auto *AmtConst = AmtBV->getConstantSplatNode()) - SraAmt = AmtConst->getZExtValue(); - } else if (Mask.getOpcode() == X86ISD::VSRAI) - SraAmt = Mask.getConstantOperandVal(1); - if ((SraAmt + 1) != EltBits) + // TODO: Attempt to handle floating point cases as well? + if (!MaskVT.isInteger() || DAG.ComputeNumSignBits(Mask) != EltBits) return SDValue(); SDLoc DL(N); @@ -31630,7 +31667,8 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, // (add (xor X, M), (and M, 1)) // And further to: // (sub (xor X, M), M) - if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { + if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT && + DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) { auto IsNegV = [](SDNode *N, SDValue V) { return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); @@ -31642,9 +31680,6 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, V = Y; if (V) { - if (EltBits != 8 && EltBits != 16 && EltBits != 32) - return SDValue(); - SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask); SDValue SubOp2 = Mask; @@ -31661,8 +31696,8 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, if (V == Y) std::swap(SubOp1, SubOp2); - return DAG.getBitcast(VT, - DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2)); + SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2); + return DAG.getBitcast(VT, Res); } } @@ -31675,7 +31710,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, X = DAG.getBitcast(BlendVT, X); Y = DAG.getBitcast(BlendVT, Y); Mask = DAG.getBitcast(BlendVT, Mask); - Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); + Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X); return DAG.getBitcast(VT, Mask); } @@ -33655,8 +33690,7 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands // are NaN, the NaN value of Op1 is the result. - auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; - return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax); + return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); } /// Do target-specific dag combines on X86ISD::ANDNP nodes. @@ -33949,7 +33983,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (InVT == MVT::i1) { SDValue Zero = DAG.getConstant(0, DL, VT); SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); - return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero); + return DAG.getSelect(DL, VT, N0, AllOnes, Zero); } return SDValue(); } |