diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-29 00:56:15 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-29 00:56:15 +0000 |
| commit | fe4fed2e4d17945c38474cf0746792d04bf84b7d (patch) | |
| tree | f82cc30abef889351b2dbe8d8aa2874056dbebbd /contrib/llvm/lib/Target/X86 | |
| parent | bbd32193a0463b1c7383443a45b774a2fe4d3430 (diff) | |
| parent | 55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff) | |
Notes
Diffstat (limited to 'contrib/llvm/lib/Target/X86')
| -rw-r--r-- | contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 5 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86.td | 3 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86ISelLowering.cpp | 164 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrAVX512.td | 26 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrFPStack.td | 6 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrInfo.td | 9 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrSSE.td | 23 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/X86WinEHState.cpp | 6 |
9 files changed, 129 insertions, 117 deletions
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 87c65347e334..f1ce430f3323 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2377,10 +2377,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Flags |= Prefix; Name = Parser.getTok().getString(); Parser.Lex(); // eat the prefix - // Hack: we could have something like + // Hack: we could have something like "rep # some comment" or // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" while (Name.startswith(";") || Name.startswith("\n") || - Name.startswith("\t") || Name.startswith("/")) { + Name.startswith("#") || Name.startswith("\t") || + Name.startswith("/")) { Name = Parser.getTok().getString(); Parser.Lex(); // go to next prefix or instr } diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 7e7c35569093..ba998467b799 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -739,7 +739,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [ FeatureVNNI, FeatureVPCLMULQDQ, FeatureVPOPCNTDQ, - FeatureGFNI + FeatureGFNI, + FeatureCLWB ]>; class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel, diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 5ac5d0348f8a..9edd799779c7 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1310,8 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - - setOperationAction(ISD::MUL, MVT::v8i64, Legal); } if (Subtarget.hasCDI()) { @@ -1388,8 +1386,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, VT, Legal); setOperationAction(ISD::FP_TO_SINT, VT, Legal); setOperationAction(ISD::FP_TO_UINT, VT, Legal); - - setOperationAction(ISD::MUL, VT, Legal); } } @@ -7108,8 +7104,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(Immediate, dl, VT); } // Lower BUILD_VECTOR operation for v8i1 and v16i1 types. -SDValue -X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { +static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); assert((VT.getVectorElementType() == MVT::i1) && @@ -7131,8 +7127,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32)); // We have to manually lower both halves so getNode doesn't try to // reassemble the build_vector. - Lower = LowerBUILD_VECTORvXi1(Lower, DAG); - Upper = LowerBUILD_VECTORvXi1(Upper, DAG); + Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget); + Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget); return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper); } SDValue Imm = ConvertI1VectorToInteger(Op, DAG); @@ -7881,7 +7877,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // Generate vectors for predicate vectors. if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) - return LowerBUILD_VECTORvXi1(Op, DAG); + return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) return VectorConstant; @@ -15543,7 +15539,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { return DAG.getNode(X86ISD::CVTSI2P, dl, VT, @@ -15551,9 +15546,15 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, DAG.getUNDEF(SrcVT))); } if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT)) - return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(VT == MVT::v2f64 && "Unexpected type"); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(), + DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src)); + } + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src)); @@ -15653,8 +15654,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, } /// 64-bit unsigned integer to double expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // This algorithm is not obvious. Here it is what we're trying to output: /* movq %rax, %xmm0 @@ -15674,7 +15675,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, // Build some magic constants. static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; Constant *C0 = ConstantDataVector::get(*Context, CV0); - auto PtrVT = getPointerTy(DAG.getDataLayout()); + auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16); SmallVector<Constant*,2> CV1; @@ -15721,8 +15722,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, } /// 32-bit unsigned integer to float expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, - SelectionDAG &DAG) const { +static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDLoc dl(Op); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, @@ -15755,16 +15756,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - MVT DestVT = Op.getSimpleValueType(); - - if (DestVT.bitsLT(MVT::f64)) - return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0, dl)); - if (DestVT.bitsGT(MVT::f64)) - return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); - - // Handle final rounding. - return Sub; + return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); } static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, @@ -15896,16 +15888,22 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } -SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, - SelectionDAG &DAG) const { +static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDValue N0 = Op.getOperand(0); MVT SrcVT = N0.getSimpleValueType(); SDLoc dl(Op); if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1) - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(Op.getValueType() == MVT::v2f64 && "Unexpected type"); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64, + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0)); + } + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0)); @@ -15930,7 +15928,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, auto PtrVT = getPointerTy(DAG.getDataLayout()); if (Op.getSimpleValueType().isVector()) - return lowerUINT_TO_FP_vec(Op, DAG); + return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); @@ -15943,9 +15941,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, } if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i64(Op, DAG); + return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); if (SrcVT == MVT::i32 && X86ScalarSSEf64) - return LowerUINT_TO_FP_i32(Op, DAG); + return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32) return SDValue(); @@ -16283,7 +16281,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op, // Truncate if we had to extend i16/i8 above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); - SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal); + SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); } // Extract back to 128/256-bit if we widened. @@ -18428,7 +18426,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, // Truncate if we had to extend i16/i8 above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(VTElt, NumElts); - V = DAG.getNode(X86ISD::VTRUNC, dl, WideVT, V); + V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); } // Extract back to 128/256-bit if we widened. @@ -18681,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } + MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load); @@ -18700,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, if (NumElts <= 8) { // A subset, assume that we have only AVX-512F - unsigned NumBitsToLoad = 8; - MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad); - SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(), + SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); - MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad); - SDValue BitVec = DAG.getBitcast(MaskVT, Load); + SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load); if (NumElts == 8) return DAG.getNode(ExtOpcode, dl, VT, BitVec); - // we should take care to v4i1 and v2i1 + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec); @@ -18730,13 +18739,12 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, Ld->getBasePtr(), Ld->getMemOperand()); - SDValue BasePtrHi = - DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(2, dl, BasePtr.getValueType())); + SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl); - SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), - BasePtrHi, - Ld->getMemOperand()); + SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi, + Ld->getPointerInfo().getWithOffset(2), + MinAlign(Ld->getAlignment(), 2U), + Ld->getMemOperand()->getFlags()); SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadLo.getValue(1), LoadHi.getValue(1)); @@ -22086,7 +22094,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && - "Should not custom lower when pmuldq is available!"); + "Should not custom lower when pmulld is available!"); + + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, dl, VT, + DAG.getBitcast(MVT::v8i16, A), + DAG.getBitcast(MVT::v8i16, B)); // Extract the odd parts. static const int UnpackMask[] = { 1, -1, 3, -1 }; @@ -22138,6 +22153,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask); bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask); + // If DQI is supported we can use MULLQ, but MULUDQ is still better if the + // the high bits are known to be zero. + if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero)) + return Op; + // Bit cast to 32-bit vectors for MULUDQ. SDValue Alo = DAG.getBitcast(MulVT, A); SDValue Blo = DAG.getBitcast(MulVT, B); @@ -31001,8 +31021,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]); } - // The replacement was made in place; don't return anything. - return SDValue(); + // The replacement was made in place; return N so it won't be revisited. + return SDValue(N, 0); } /// If a vector select has an operand that is -1 or 0, try to simplify the @@ -32256,6 +32276,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, if ((NumElts % 2) != 0) return SDValue(); + // If the upper 17 bits of each element are zero then we can use PMADD. + APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) && + DAG.MaskedValueIsZero(N1, Mask17)) + return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0), + DAG.getBitcast(MVT::v8i16, N1)); + unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); @@ -33047,10 +33074,8 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // The right side has to be a 'trunc' or a constant vector. bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getValueType() == VT; - ConstantSDNode *RHSConstSplat = nullptr; - if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1)) - RHSConstSplat = RHSBV->getConstantSplatNode(); - if (!RHSTrunc && !RHSConstSplat) + if (!RHSTrunc && + !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -33060,13 +33085,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); - if (RHSConstSplat) { - N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT.getVectorElementType(), - SDValue(RHSConstSplat, 0)); - N1 = DAG.getSplatBuildVector(VT, DL, N1); - } else if (RHSTrunc) { + if (RHSTrunc) N1 = N1->getOperand(0); - } + else + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); // Generate the wide operation. SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1); @@ -34039,15 +34061,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl); SDValue Load2 = - DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), - std::min(16U, Alignment), Ld->getMemOperand()->getFlags()); + DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags()); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); - SDValue NewVec = DAG.getUNDEF(RegVT); - NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl); - NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl); + SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); return DCI.CombineTo(N, NewVec, TF, true); } @@ -34453,8 +34474,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags()); SDValue Ch1 = - DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), - std::min(16U, Alignment), St->getMemOperand()->getFlags()); + DAG.getStore(St->getChain(), dl, Value1, Ptr1, + St->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } @@ -34876,7 +34898,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its // better to truncate if we have the chance. if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && - !TLI.isOperationLegal(Opcode, SrcVT)) + !Subtarget.hasDQI()) return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); LLVM_FALLTHROUGH; case ISD::ADD: { diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h index 8464081b1b08..7708f577ba70 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h @@ -1167,7 +1167,6 @@ namespace llvm { bool isReplace) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; @@ -1183,9 +1182,6 @@ namespace llvm { SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td index 2a2286e42405..dcd84930741b 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4420,12 +4420,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, SSE_INTALU_ITINS_P, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, - SSE_INTALU_ITINS_P, HasBWI, 1>; + SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P, + SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P, HasBWI, 1>; defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, HasBWI, 1>; @@ -4454,7 +4454,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins, } } -defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P, +defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P, avx512vl_i32_info, avx512vl_i64_info, X86pmuldq, HasAVX512, 1>,T8PD; defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P, @@ -8704,17 +8704,6 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>; } -// Use 512bit version to implement 128/256 bit in case NoVLX. -multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info, - X86VectorVTInfo _> { - - def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))), - (X86Info.VT (EXTRACT_SUBREG - (_.VT (!cast<Instruction>(NAME#"Zrr") - (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))), - X86Info.SubRegIdx))>; -} - multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, string OpcodeStr, Predicate prd> { let Predicates = [prd] in @@ -8724,11 +8713,6 @@ let Predicates = [prd] in defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; } -let Predicates = [prd, NoVLX] in { - defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>; - defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>; - } - } defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td index 7e89a4111d86..619b399ef8d8 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td @@ -141,6 +141,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP, // These instructions cannot address 80-bit memory. multiclass FPBinary<SDNode OpNode, Format fp, string asmstring, bit Forward = 1> { +let mayLoad = 1, hasSideEffects = 1 in { // ST(0) = ST(0) + [mem] def _Fp32m : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, f32mem:$src2), OneArgFPRW, @@ -177,10 +178,8 @@ def _Fp80m64: FpI_<(outs RFP80:$dst), (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))), (set RFP80:$dst, (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>; -let mayLoad = 1 in def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src), !strconcat("f", asmstring, "{s}\t$src")>; -let mayLoad = 1 in def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src), !strconcat("f", asmstring, "{l}\t$src")>; // ST(0) = ST(0) + [memint] @@ -226,12 +225,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), (OpNode RFP80:$src1, (X86fild addr:$src2, i32))), (set RFP80:$dst, (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>; -let mayLoad = 1 in def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src), !strconcat("fi", asmstring, "{s}\t$src")>; -let mayLoad = 1 in def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), !strconcat("fi", asmstring, "{l}\t$src")>; +} // mayLoad = 1, hasSideEffects = 1 } let Defs = [FPSW] in { diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index fdf3e73e4fcd..27c67500b26f 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -832,9 +832,11 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; -def HasVNNI : Predicate<"Subtarget->hasVNNI()">; +def HasVNNI : Predicate<"Subtarget->hasVNNI()">, + AssemblerPredicate<"FeatureVNNI", "AVX-512 VNNI ISA">; -def HasBITALG : Predicate<"Subtarget->hasBITALG()">; +def HasBITALG : Predicate<"Subtarget->hasBITALG()">, + AssemblerPredicate<"FeatureBITALG", "AVX-512 BITALG ISA">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; def HasVAES : Predicate<"Subtarget->hasVAES()">; @@ -866,7 +868,8 @@ def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">; def HasVBMI : Predicate<"Subtarget->hasVBMI()">, AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">; -def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">; +def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">, + AssemblerPredicate<"FeatureVBMI2", "AVX-512 VBMI2 ISA">; def HasIFMA : Predicate<"Subtarget->hasIFMA()">, AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">; def HasRTM : Predicate<"Subtarget->hasRTM()">; diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index b48fa1841979..cb84f9aecf79 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -3734,7 +3734,7 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -3742,8 +3742,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -6313,7 +6313,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -6321,8 +6321,8 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst @@ -6338,7 +6338,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), @@ -6346,8 +6346,8 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + (bitconvert (memop_frag addr:$src2)))))], + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX, NoVLX] in { @@ -6924,14 +6924,15 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp index 0472a85f50da..6d6dedc60736 100644 --- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp +++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp @@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const { } bool WinEHStatePass::runOnFunction(Function &F) { + // Don't insert state stores or exception handler thunks for + // available_externally functions. The handler needs to reference the LSDA, + // which will not be emitted in this case. + if (F.hasAvailableExternallyLinkage()) + return false; + // Check the personality. Do nothing if this personality doesn't use funclets. if (!F.hasPersonalityFn()) return false; |
