aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrFragmentsSIMD.td')
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td199
1 files changed, 71 insertions, 128 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 3250123e5aa6..f3f7d17d9b3c 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -59,9 +59,13 @@ def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
-def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
-def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
-def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
+
+def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
+
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -535,8 +539,20 @@ def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fmadd node:$src1, node:$src2, node:$src3),
(X86Fmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_Fnmadd node:$src1, node:$src2, node:$src3),
+ (X86Fnmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_Fmsub node:$src1, node:$src2, node:$src3),
+ (X86Fmsub node:$src1, node:$src2, node:$src3)]>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
+def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(X86strict_Fnmsub node:$src1, node:$src2, node:$src3),
+ (X86Fnmsub node:$src1, node:$src2, node:$src3)]>;
def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>;
@@ -709,19 +725,27 @@ def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>;
def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>;
def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>;
+def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>]>;
+def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>;
+def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps,
+ [SDNPHasChain]>;
+def X86any_cvtph2ps : PatFrags<(ops node:$src),
+ [(X86strict_cvtph2ps node:$src),
+ (X86cvtph2ps node:$src)]>;
+
+def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>;
+
+def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisVT<2, i32>]>;
+def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>;
+def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph,
+ [SDNPHasChain]>;
+def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2),
+ [(X86strict_cvtps2ph node:$src1, node:$src2),
+ (X86cvtps2ph node:$src1, node:$src2)]>;
-def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, i16>]> >;
-
-def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, i16>]> >;
-
-def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
- SDTCVecEltisVT<1, f32>,
- SDTCisVT<2, i32>]> >;
def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>,
SDTCVecEltisVT<1, f32>,
@@ -741,7 +765,9 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
// cvt fp to bfloat16
def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisSameSizeAs<0,1>,
SDTCisSameAs<1,2>]>>;
def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
@@ -768,23 +794,6 @@ def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store
]>;
//===----------------------------------------------------------------------===//
-// SSE Complex Patterns
-//===----------------------------------------------------------------------===//
-
-// These are 'extloads' from a scalar to the low element of a vector, zeroing
-// the top elements. These are used for the SSE 'ss' and 'sd' instruction
-// forms.
-def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot, SDNPWantParent]>;
-def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot, SDNPWantParent]>;
-
-def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
-def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
-
-//===----------------------------------------------------------------------===//
// SSE pattern fragments
//===----------------------------------------------------------------------===//
@@ -895,89 +904,6 @@ def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
-def X86masked_gather : SDNode<"X86ISD::MGATHER",
- SDTypeProfile<2, 3, [SDTCisVec<0>,
- SDTCisVec<1>, SDTCisInt<1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<1, 3>,
- SDTCisPtrTy<4>]>,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-
-def X86masked_scatter : SDNode<"X86ISD::MSCATTER",
- SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<0, 2>,
- SDTCVecEltisVT<0, i1>,
- SDTCisPtrTy<3>]>,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
-def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v4i32;
-}]>;
-
-def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v8i32;
-}]>;
-
-def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v2i64;
-}]>;
-def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v4i64;
-}]>;
-def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v8i64;
-}]>;
-def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_gather node:$src1, node:$src2, node:$src3) , [{
- X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N);
- return Mgt->getIndex().getValueType() == MVT::v16i32;
-}]>;
-
-def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v2i64;
-}]>;
-
-def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v4i32;
-}]>;
-
-def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v4i64;
-}]>;
-
-def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v8i32;
-}]>;
-
-def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v8i64;
-}]>;
-def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{
- X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N);
- return Sc->getIndex().getValueType() == MVT::v16i32;
-}]>;
-
// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
@@ -1037,6 +963,23 @@ def X86VBroadcastld64 : PatFrag<(ops node:$src),
return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8;
}]>;
+// Scalar SSE intrinsic fragments to match several different types of loads.
+// Used by scalar SSE intrinsic instructions which have 128 bit types, but
+// only load a single element.
+// FIXME: We should add more canolicalizing in DAGCombine. Particulary removing
+// the simple_load case.
+def sse_load_f32 : PatFrags<(ops node:$ptr),
+ [(v4f32 (simple_load node:$ptr)),
+ (v4f32 (X86vzload32 node:$ptr)),
+ (v4f32 (scalar_to_vector (loadf32 node:$ptr)))]>;
+def sse_load_f64 : PatFrags<(ops node:$ptr),
+ [(v2f64 (simple_load node:$ptr)),
+ (v2f64 (X86vzload64 node:$ptr)),
+ (v2f64 (scalar_to_vector (loadf64 node:$ptr)))]>;
+
+def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
+def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
+
def fp32imm0 : PatLeaf<(f32 fpimm), [{
return N->isExactlyValue(+0.0);
@@ -1185,60 +1128,60 @@ def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore,
def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncSStore node:$val, node:$ptr), [{
- return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncUSStore node:$val, node:$ptr), [{
- return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncSStore node:$val, node:$ptr), [{
- return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncUSStore node:$val, node:$ptr), [{
- return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncSStore node:$val, node:$ptr), [{
- return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr),
(X86TruncUSStore node:$val, node:$ptr), [{
- return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
- return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;