summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrFragmentsSIMD.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrFragmentsSIMD.td')
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td111
1 files changed, 21 insertions, 90 deletions
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index ebbef00c01d9..739275907978 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -153,12 +153,6 @@ def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
-def X86IntCmpMask : SDTypeProfile<1, 2,
- [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisSameAs<1, 2>, SDTCisInt<1>,
- SDTCisSameNumEltsAs<0, 1>]>;
-def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
-def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
-
def X86CmpMaskCC :
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
@@ -177,8 +171,9 @@ def X86CmpMaskCCScalarRound :
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+// Hack to make CMPM commutable in tablegen patterns for load folding.
+def X86cmpm_c : SDNode<"X86ISD::CMPM", X86CmpMaskCC, [SDNPCommutative]>;
def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
-def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>;
def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND", X86CmpMaskCCScalarRound>;
@@ -211,6 +206,8 @@ def X86kshiftr : SDNode<"X86ISD::KSHIFTR",
SDTCisSameAs<0, 1>,
SDTCisVT<2, i8>]>>;
+def X86kadd : SDNode<"X86ISD::KADD", SDTIntBinOp, [SDNPCommutative]>;
+
def X86vrotli : SDNode<"X86ISD::VROTLI", X86vshiftimm>;
def X86vrotri : SDNode<"X86ISD::VROTRI", X86vshiftimm>;
@@ -228,9 +225,9 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU",
def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
- SDTCisSameSizeAs<0,3>,
- SDTCisSameNumEltsAs<0, 3>,
SDTCisFP<0>, SDTCisInt<3>,
+ SDTCisSameNumEltsAs<0, 3>,
+ SDTCisSameSizeAs<0,3>,
SDTCisVT<4, i8>]>>;
def X86vpperm : SDNode<"X86ISD::VPPERM",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
@@ -240,10 +237,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
-def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>,
- SDTCisSameNumEltsAs<0, 1>]>;
-
def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
@@ -254,8 +247,6 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
-def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;
-def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>;
def X86movmsk : SDNode<"X86ISD::MOVMSK",
SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;
@@ -267,14 +258,12 @@ def X86selects : SDNode<"X86ISD::SELECTS",
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
- SDTCVecEltisVT<1, i32>,
- SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>]>,
[SDNPCommutative]>;
def X86pmuldq : SDNode<"X86ISD::PMULDQ",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
- SDTCVecEltisVT<1, i32>,
- SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>]>,
[SDNPCommutative]>;
@@ -292,11 +281,13 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI",
def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
+def SDTShuff2OpFP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>;
def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameSizeAs<0,2>,
+ SDTCisFP<0>, SDTCisInt<2>,
SDTCisSameNumEltsAs<0,2>,
- SDTCisFP<0>, SDTCisInt<2>]>;
+ SDTCisSameSizeAs<0,2>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -379,15 +370,11 @@ def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
-def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
-def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2OpFP>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2OpFP>;
-def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
-def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
-def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
-
-def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
-def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2OpFP>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2OpFP>;
def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisVec<1>, SDTCisInt<1>,
@@ -427,15 +414,6 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
SDTCisSameSizeAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
-// Even though the index operand should be integer, we need to make it match the
-// destination type so that we can pattern match the masked version where the
-// index is also the passthru operand.
-def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
- SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>, []>;
-
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
@@ -465,10 +443,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
-def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCVecEltisVT<1, i1>,
- SDTCisPtrTy<2>]>>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
@@ -507,35 +481,6 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat
def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
-// Scalar FMA4 intrinsics which zero the non-scalar bits.
-def X86Fmadd4s : SDNode<"X86ISD::FMADD4S", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fnmadd4s : SDNode<"X86ISD::FNMADD4S", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fmsub4s : SDNode<"X86ISD::FMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fnmsub4s : SDNode<"X86ISD::FNMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>;
-
-// Scalar FMA intrinsics with passthru bits in operand 1.
-def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>;
-def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>;
-def X86Fmsubs1 : SDNode<"X86ISD::FMSUBS1", SDTFPTernaryOp>;
-def X86Fnmsubs1 : SDNode<"X86ISD::FNMSUBS1", SDTFPTernaryOp>;
-
-// Scalar FMA intrinsics with passthru bits in operand 1.
-def X86FmaddRnds1 : SDNode<"X86ISD::FMADDS1_RND", SDTFmaRound>;
-def X86FnmaddRnds1 : SDNode<"X86ISD::FNMADDS1_RND", SDTFmaRound>;
-def X86FmsubRnds1 : SDNode<"X86ISD::FMSUBS1_RND", SDTFmaRound>;
-def X86FnmsubRnds1 : SDNode<"X86ISD::FNMSUBS1_RND", SDTFmaRound>;
-
-def X86Fmadds3 : SDNode<"X86ISD::FMADDS3", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fnmadds3 : SDNode<"X86ISD::FNMADDS3", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fmsubs3 : SDNode<"X86ISD::FMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fnmsubs3 : SDNode<"X86ISD::FNMSUBS3", SDTFPTernaryOp, [SDNPCommutative]>;
-
-// Scalar FMA intrinsics with passthru bits in operand 3.
-def X86FmaddRnds3 : SDNode<"X86ISD::FMADDS3_RND", SDTFmaRound, [SDNPCommutative]>;
-def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound, [SDNPCommutative]>;
-def X86FmsubRnds3 : SDNode<"X86ISD::FMSUBS3_RND", SDTFmaRound, [SDNPCommutative]>;
-def X86FnmsubRnds3 : SDNode<"X86ISD::FNMSUBS3_RND", SDTFmaRound, [SDNPCommutative]>;
-
def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
@@ -569,17 +514,6 @@ def X86RndScalesRnd : SDNode<"X86ISD::VRNDSCALES_RND", SDTFPBinOpImmRound>;
def X86ReducesRnd : SDNode<"X86ISD::VREDUCES_RND", SDTFPBinOpImmRound>;
def X86GetMantsRnd : SDNode<"X86ISD::VGETMANTS_RND", SDTFPBinOpImmRound>;
-def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
- SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
- SDTCisVT<4, i8>]>;
-def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
- SDTCisVT<2, v16i8>, SDTCisVT<3, i32>,
- SDTCisVT<4, v16i8>, SDTCisVT<5, i32>,
- SDTCisVT<6, i8>]>;
-
-def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
-def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
-
def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
@@ -671,8 +605,6 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
SDTCisOpSmallerThanOp<0, 1>,
SDTCisVT<2, i32>]>>;
-def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>;
-
// galois field arithmetic
def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
@@ -687,10 +619,10 @@ def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>;
// forms.
def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot]>;
+ SDNPWantRoot, SDNPWantParent]>;
def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot]>;
+ SDNPWantRoot, SDNPWantParent]>;
def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
@@ -892,6 +824,7 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>;
+def bc_v4f64 : PatFrag<(ops node:$in), (v4f64 (bitconvert node:$in))>;
// 512-bit bitconvert pattern fragments
def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;
@@ -924,10 +857,8 @@ def I8Imm : SDNodeXForm<imm, [{
return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
}]>;
-def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>;
-def FROUND_CURRENT : ImmLeaf<i32, [{
- return Imm == X86::STATIC_ROUNDING::CUR_DIRECTION;
-}]>;
+def FROUND_NO_EXC : PatLeaf<(i32 8)>;
+def FROUND_CURRENT : PatLeaf<(i32 4)>;
// BYTE_imm - Transform bit immediates into byte immediates.
def BYTE_imm : SDNodeXForm<imm, [{