diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrFragmentsSIMD.td')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 199 |
1 files changed, 71 insertions, 128 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 3250123e5aa6..f3f7d17d9b3c 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -59,9 +59,13 @@ def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; -def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; -def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; +def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>; +def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>; + +def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; + def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; @@ -535,8 +539,20 @@ def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fmadd node:$src1, node:$src2, node:$src3), (X86Fmadd node:$src1, node:$src2, node:$src3)]>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>; +def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; +def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), + [(X86strict_Fnmadd node:$src1, node:$src2, node:$src3), + (X86Fnmadd node:$src1, node:$src2, node:$src3)]>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>; +def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; +def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), + [(X86strict_Fmsub node:$src1, node:$src2, node:$src3), + (X86Fmsub node:$src1, node:$src2, node:$src3)]>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>; +def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; +def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), + [(X86strict_Fnmsub node:$src1, node:$src2, node:$src3), + (X86Fnmsub node:$src1, node:$src2, node:$src3)]>; def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>; def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>; @@ -709,19 +725,27 @@ def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>; def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>; def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>; +def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, i16>]>; +def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>; +def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps, + [SDNPHasChain]>; +def X86any_cvtph2ps : PatFrags<(ops node:$src), + [(X86strict_cvtph2ps node:$src), + (X86cvtph2ps node:$src)]>; + +def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>; + +def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, f32>, + SDTCisVT<2, i32>]>; +def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>; +def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph, + [SDNPHasChain]>; +def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2), + [(X86strict_cvtps2ph node:$src1, node:$src2), + (X86cvtps2ph node:$src1, node:$src2)]>; -def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", - SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, - SDTCVecEltisVT<1, i16>]> >; - -def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", - SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, - SDTCVecEltisVT<1, i16>]> >; - -def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", - SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, - SDTCVecEltisVT<1, f32>, - SDTCisVT<2, i32>]> >; def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH", SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, f32>, @@ -741,7 +765,9 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", // cvt fp to bfloat16 def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16", - SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, + SDTCVecEltisVT<1, f32>, + SDTCisSameSizeAs<0,1>, SDTCisSameAs<1,2>]>>; def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>, @@ -768,23 +794,6 @@ def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store ]>; //===----------------------------------------------------------------------===// -// SSE Complex Patterns -//===----------------------------------------------------------------------===// - -// These are 'extloads' from a scalar to the low element of a vector, zeroing -// the top elements. These are used for the SSE 'ss' and 'sd' instruction -// forms. -def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, - SDNPWantRoot, SDNPWantParent]>; -def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, - SDNPWantRoot, SDNPWantParent]>; - -def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>; -def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>; - -//===----------------------------------------------------------------------===// // SSE pattern fragments //===----------------------------------------------------------------------===// @@ -895,89 +904,6 @@ def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>; def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; -def X86masked_gather : SDNode<"X86ISD::MGATHER", - SDTypeProfile<2, 3, [SDTCisVec<0>, - SDTCisVec<1>, SDTCisInt<1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<1, 3>, - SDTCisPtrTy<4>]>, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - -def X86masked_scatter : SDNode<"X86ISD::MSCATTER", - SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisSameAs<0, 2>, - SDTCVecEltisVT<0, i1>, - SDTCisPtrTy<3>]>, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - -def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ - X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N); - return Mgt->getIndex().getValueType() == MVT::v4i32; -}]>; - -def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ - X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N); - return Mgt->getIndex().getValueType() == MVT::v8i32; -}]>; - -def mgatherv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ - X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N); - return Mgt->getIndex().getValueType() == MVT::v2i64; -}]>; -def mgatherv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ - X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N); - return Mgt->getIndex().getValueType() == MVT::v4i64; -}]>; -def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ - X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N); - return Mgt->getIndex().getValueType() == MVT::v8i64; -}]>; -def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_gather node:$src1, node:$src2, node:$src3) , [{ - X86MaskedGatherSDNode *Mgt = cast<X86MaskedGatherSDNode>(N); - return Mgt->getIndex().getValueType() == MVT::v16i32; -}]>; - -def mscatterv2i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{ - X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N); - return Sc->getIndex().getValueType() == MVT::v2i64; -}]>; - -def mscatterv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{ - X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N); - return Sc->getIndex().getValueType() == MVT::v4i32; -}]>; - -def mscatterv4i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{ - X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N); - return Sc->getIndex().getValueType() == MVT::v4i64; -}]>; - -def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{ - X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N); - return Sc->getIndex().getValueType() == MVT::v8i32; -}]>; - -def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{ - X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N); - return Sc->getIndex().getValueType() == MVT::v8i64; -}]>; -def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (X86masked_scatter node:$src1, node:$src2, node:$src3) , [{ - X86MaskedScatterSDNode *Sc = cast<X86MaskedScatterSDNode>(N); - return Sc->getIndex().getValueType() == MVT::v16i32; -}]>; - // 128-bit bitconvert pattern fragments def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; @@ -1037,6 +963,23 @@ def X86VBroadcastld64 : PatFrag<(ops node:$src), return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 8; }]>; +// Scalar SSE intrinsic fragments to match several different types of loads. +// Used by scalar SSE intrinsic instructions which have 128 bit types, but +// only load a single element. +// FIXME: We should add more canolicalizing in DAGCombine. Particulary removing +// the simple_load case. +def sse_load_f32 : PatFrags<(ops node:$ptr), + [(v4f32 (simple_load node:$ptr)), + (v4f32 (X86vzload32 node:$ptr)), + (v4f32 (scalar_to_vector (loadf32 node:$ptr)))]>; +def sse_load_f64 : PatFrags<(ops node:$ptr), + [(v2f64 (simple_load node:$ptr)), + (v2f64 (X86vzload64 node:$ptr)), + (v2f64 (scalar_to_vector (loadf64 node:$ptr)))]>; + +def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>; +def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>; + def fp32imm0 : PatLeaf<(f32 fpimm), [{ return N->isExactlyValue(+0.0); @@ -1185,60 +1128,60 @@ def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore, def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr), (X86TruncSStore node:$val, node:$ptr), [{ - return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr), (X86TruncUSStore node:$val, node:$ptr), [{ - return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr), (X86TruncSStore node:$val, node:$ptr), [{ - return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr), (X86TruncUSStore node:$val, node:$ptr), [{ - return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr), (X86TruncSStore node:$val, node:$ptr), [{ - return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr), (X86TruncUSStore node:$val, node:$ptr), [{ - return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; }]>; def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ - return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; + return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; |