diff options
Diffstat (limited to 'lib/Target/X86/X86InstrFragmentsSIMD.td')
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 111 |
1 files changed, 21 insertions, 90 deletions
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ebbef00c01d9..739275907978 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -153,12 +153,6 @@ def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; -def X86IntCmpMask : SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisSameAs<1, 2>, SDTCisInt<1>, - SDTCisSameNumEltsAs<0, 1>]>; -def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>; -def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>; - def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameAs<2, 1>, @@ -177,8 +171,9 @@ def X86CmpMaskCCScalarRound : SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; +// Hack to make CMPM commutable in tablegen patterns for load folding. +def X86cmpm_c : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPCommutative]>; def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; -def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>; def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND", X86CmpMaskCCScalarRound>; @@ -211,6 +206,8 @@ def X86kshiftr : SDNode<"X86ISD::KSHIFTR", SDTCisSameAs<0, 1>, SDTCisVT<2, i8>]>>; +def X86kadd : SDNode<"X86ISD::KADD", SDTIntBinOp, [SDNPCommutative]>; + def X86vrotli : SDNode<"X86ISD::VROTLI", X86vshiftimm>; def X86vrotri : SDNode<"X86ISD::VROTRI", X86vshiftimm>; @@ -228,9 +225,9 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU", def X86vpermil2 : SDNode<"X86ISD::VPERMIL2", SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, - SDTCisSameSizeAs<0,3>, - SDTCisSameNumEltsAs<0, 3>, SDTCisFP<0>, SDTCisInt<3>, + SDTCisSameNumEltsAs<0, 3>, + SDTCisSameSizeAs<0,3>, SDTCisVT<4, i8>]>>; def X86vpperm : SDNode<"X86ISD::VPPERM", SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, @@ -240,10 +237,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; -def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>, - SDTCisSameNumEltsAs<0, 1>]>; - def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>; def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>; @@ -254,8 +247,6 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; -def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>; -def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>; def X86movmsk : SDNode<"X86ISD::MOVMSK", SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>; @@ -267,14 +258,12 @@ def X86selects : SDNode<"X86ISD::SELECTS", def X86pmuludq : SDNode<"X86ISD::PMULUDQ", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, - SDTCVecEltisVT<1, i32>, - SDTCisSameSizeAs<0,1>, + SDTCisSameAs<0,1>, SDTCisSameAs<1,2>]>, [SDNPCommutative]>; def X86pmuldq : SDNode<"X86ISD::PMULDQ", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, - SDTCVecEltisVT<1, i32>, - SDTCisSameSizeAs<0,1>, + SDTCisSameAs<0,1>, SDTCisSameAs<1,2>]>, [SDNPCommutative]>; @@ -292,11 +281,13 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI", def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; +def SDTShuff2OpFP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>, + SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>; def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameSizeAs<0,2>, + SDTCisFP<0>, SDTCisInt<2>, SDTCisSameNumEltsAs<0,2>, - SDTCisFP<0>, SDTCisInt<2>]>; + SDTCisSameSizeAs<0,2>]>; def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>; def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, @@ -379,15 +370,11 @@ def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>; -def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>; -def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>; +def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2OpFP>; +def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2OpFP>; -def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>; -def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>; -def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; - -def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; -def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; +def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2OpFP>; +def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2OpFP>; def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVec<1>, SDTCisInt<1>, @@ -427,15 +414,6 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3", SDTCisSameSizeAs<0,2>, SDTCisSameAs<0,3>]>, []>; -// Even though the index operand should be integer, we need to make it match the -// destination type so that we can pattern match the masked version where the -// index is also the passthru operand. -def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", - SDTypeProfile<1, 3, [SDTCisVec<0>, - SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>, []>; - def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; @@ -465,10 +443,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; -def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCVecEltisVT<1, i1>, - SDTCisPtrTy<2>]>>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; @@ -507,35 +481,6 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>; def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>; -// Scalar FMA4 intrinsics which zero the non-scalar bits. -def X86Fmadd4s : SDNode<"X86ISD::FMADD4S", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fnmadd4s : SDNode<"X86ISD::FNMADD4S", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fmsub4s : SDNode<"X86ISD::FMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fnmsub4s : SDNode<"X86ISD::FNMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>; - -// Scalar FMA intrinsics with passthru bits in operand 1. -def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>; -def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>; -def X86Fmsubs1 : SDNode<"X86ISD::FMSUBS1", SDTFPTernaryOp>; -def X86Fnmsubs1 : SDNode<"X86ISD::FNMSUBS1", SDTFPTernaryOp>; - -// Scalar FMA intrinsics with passthru bits in operand 1. -def X86FmaddRnds1 : SDNode<"X86ISD::FMADDS1_RND", SDTFmaRound>; -def X86FnmaddRnds1 : SDNode<"X86ISD::FNMADDS1_RND", SDTFmaRound>; -def X86FmsubRnds1 : SDNode<"X86ISD::FMSUBS1_RND", SDTFmaRound>; -def X86FnmsubRnds1 : SDNode<"X86ISD::FNMSUBS1_RND", SDTFmaRound>; - -def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>; - -// Scalar FMA intrinsics with passthru bits in operand 3. -def X86FmaddRnds3 : SDNode<"X86ISD::FMADDS3_RND", SDTFmaRound, [SDNPCommutative]>; -def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound, [SDNPCommutative]>; -def X86FmsubRnds3 : SDNode<"X86ISD::FMSUBS3_RND", SDTFmaRound, [SDNPCommutative]>; -def X86FnmsubRnds3 : SDNode<"X86ISD::FNMSUBS3_RND", SDTFmaRound, [SDNPCommutative]>; - def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>; @@ -569,17 +514,6 @@ def X86RndScalesRnd : SDNode<"X86ISD::VRNDSCALES_RND", SDTFPBinOpImmRound>; def X86ReducesRnd : SDNode<"X86ISD::VREDUCES_RND", SDTFPBinOpImmRound>; def X86GetMantsRnd : SDNode<"X86ISD::VGETMANTS_RND", SDTFPBinOpImmRound>; -def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, - SDTCisVT<4, i8>]>; -def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, - SDTCisVT<2, v16i8>, SDTCisVT<3, i32>, - SDTCisVT<4, v16i8>, SDTCisVT<5, i32>, - SDTCisVT<6, i8>]>; - -def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; -def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; - def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, @@ -671,8 +605,6 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", SDTCisOpSmallerThanOp<0, 1>, SDTCisVT<2, i32>]>>; -def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>; - // galois field arithmetic def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; @@ -687,10 +619,10 @@ def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>; // forms. def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [], [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, - SDNPWantRoot]>; + SDNPWantRoot, SDNPWantParent]>; def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [], [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, - SDNPWantRoot]>; + SDNPWantRoot, SDNPWantParent]>; def ssmem : Operand<v4f32> { let PrintMethod = "printf32mem"; @@ -892,6 +824,7 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>; def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>; +def bc_v4f64 : PatFrag<(ops node:$in), (v4f64 (bitconvert node:$in))>; // 512-bit bitconvert pattern fragments def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>; @@ -924,10 +857,8 @@ def I8Imm : SDNodeXForm<imm, [{ return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N)); }]>; -def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>; -def FROUND_CURRENT : ImmLeaf<i32, [{ - return Imm == X86::STATIC_ROUNDING::CUR_DIRECTION; -}]>; +def FROUND_NO_EXC : PatLeaf<(i32 8)>; +def FROUND_CURRENT : PatLeaf<(i32 4)>; // BYTE_imm - Transform bit immediates into byte immediates. def BYTE_imm : SDNodeXForm<imm, [{ |