diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMInstrMVE.td')
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 2779 |
1 files changed, 1866 insertions, 913 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 4f67cd6e47cc..604291be822c 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -275,6 +275,83 @@ class mve_addr_q_shift<int shift> : MemOperand { let MIOperandInfo = (ops MQPR:$base, i32imm:$imm); } +// A family of classes wrapping up information about the vector types +// used by MVE. +class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred, + bits<2> size, string suffixletter, bit unsigned> { + // The LLVM ValueType representing the vector, so we can use it in + // ISel patterns. + ValueType Vec = vec; + + // The LLVM ValueType representing a vector with elements double the size + // of those in Vec, so we can use it in ISel patterns. It is up to the + // invoker of this class to ensure that this is a correct choice. + ValueType DblVec = dblvec; + + // An LLVM ValueType representing a corresponding vector of + // predicate bits, for use in ISel patterns that handle an IR + // intrinsic describing the predicated form of the instruction. + // + // Usually, for a vector of N things, this will be vNi1. But for + // vectors of 2 values, we make an exception, and use v4i1 instead + // of v2i1. Rationale: MVE codegen doesn't support doing all the + // auxiliary operations on v2i1 (vector shuffles etc), and also, + // there's no MVE compare instruction that will _generate_ v2i1 + // directly. + ValueType Pred = pred; + + // The most common representation of the vector element size in MVE + // instruction encodings: a 2-bit value V representing an (8<<V)-bit + // vector element. + bits<2> Size = size; + + // For vectors explicitly mentioning a signedness of integers: 0 for + // signed and 1 for unsigned. For anything else, undefined. + bit Unsigned = unsigned; + + // The number of bits in a vector element, in integer form. + int LaneBits = !shl(8, Size); + + // The suffix used in assembly language on an instruction operating + // on this lane if it only cares about number of bits. + string BitsSuffix = !if(!eq(suffixletter, "p"), + !if(!eq(unsigned, 0b0), "8", "16"), + !cast<string>(LaneBits)); + + // The suffix used on an instruction that mentions the whole type. + string Suffix = suffixletter ## BitsSuffix; + + // The letter part of the suffix only. + string SuffixLetter = suffixletter; +} + +// Integer vector types that don't treat signed and unsigned differently. +def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "i", ?>; +def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "i", ?>; +def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "i", ?>; +def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "i", ?>; + +// Explicitly signed and unsigned integer vectors. They map to the +// same set of LLVM ValueTypes as above, but are represented +// differently in assembly and instruction encodings. +def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "s", 0b0>; +def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "s", 0b0>; +def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "s", 0b0>; +def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "s", 0b0>; +def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "u", 0b1>; +def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "u", 0b1>; +def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "u", 0b1>; +def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "u", 0b1>; + +// FP vector types. +def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, 0b01, "f", ?>; +def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, 0b10, "f", ?>; +def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, 0b11, "f", ?>; + +// Polynomial vector types. +def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b11, "p", 0b0>; +def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b11, "p", 0b1>; + // --------- Start of base classes for the instructions themselves class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm, @@ -346,9 +423,12 @@ class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr, let Inst{19-16} = RdaDest{3-0}; } -class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4, list<dag> pattern=[]> +class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4> : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm), - "$RdaSrc, $imm", "$RdaDest = $RdaSrc", pattern> { + "$RdaSrc, $imm", "$RdaDest = $RdaSrc", + [(set rGPR:$RdaDest, + (i32 (!cast<Intrinsic>("int_arm_mve_" # iname) + (i32 rGPR:$RdaSrc), (i32 imm:$imm))))]> { bits<5> imm; let Inst{15} = 0b0; @@ -364,9 +444,12 @@ def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>; def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>; def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>; -class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]> +class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4> : MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm), - "$RdaSrc, $Rm", "$RdaDest = $RdaSrc", pattern> { + "$RdaSrc, $Rm", "$RdaDest = $RdaSrc", + [(set rGPR:$RdaDest, + (i32 (!cast<Intrinsic>("int_arm_mve_" # iname) + (i32 rGPR:$RdaSrc), (i32 rGPR:$Rm))))]> { bits<4> Rm; let Inst{15-12} = Rm{3-0}; @@ -487,10 +570,10 @@ class MVE_rDest<dag oops, dag iops, InstrItinClass itin, let Inst{4} = 0b0; } -class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]> +class MVE_VABAV<string suffix, bit U, bits<2> size> : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm), NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src", - pattern> { + []> { bits<4> Qm; bits<4> Qn; bits<4> Rda; @@ -509,12 +592,35 @@ class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]> let Inst{0} = 0b1; } -def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>; -def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>; -def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>; -def MVE_VABAVu8 : MVE_VABAV<"u8", 0b1, 0b00>; -def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>; -def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>; +multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> { + def "" : MVE_VABAV<VTI.Suffix, VTI.Unsigned, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + def : Pat<(i32 (int_arm_mve_vabav + (i32 VTI.Unsigned), + (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (i32 (Inst (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>; + + def : Pat<(i32 (int_arm_mve_vabav_predicated + (i32 VTI.Unsigned), + (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (i32 (Inst (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; + } +} + +defm MVE_VABAVs8 : MVE_VABAV_m<MVE_v16s8>; +defm MVE_VABAVs16 : MVE_VABAV_m<MVE_v8s16>; +defm MVE_VABAVs32 : MVE_VABAV_m<MVE_v4s32>; +defm MVE_VABAVu8 : MVE_VABAV_m<MVE_v16u8>; +defm MVE_VABAVu16 : MVE_VABAV_m<MVE_v8u16>; +defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>; class MVE_VADDV<string iname, string suffix, dag iops, string cstr, bit A, bit U, bits<2> size, list<dag> pattern=[]> @@ -658,17 +764,31 @@ class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size, let Inst{0} = 0b0; } -multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> { - def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b1, bit_7>; - def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b1, bit_7>; - def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b1, bit_7>; - def u8 : MVE_VMINMAXV<iname, "u8", 0b1, 0b00, 0b1, bit_7>; - def u16 : MVE_VMINMAXV<iname, "u16", 0b1, 0b01, 0b1, bit_7>; - def u32 : MVE_VMINMAXV<iname, "u32", 0b1, 0b10, 0b1, bit_7>; +multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7, + MVEVectorVTInfo VTI, Intrinsic intr> { + def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, + bit_17, bit_7>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in + def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))), + (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>; +} + +multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, + Intrinsic intr_s, Intrinsic intr_u> { + defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>; + defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>; + defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>; + defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>; + defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>; + defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>; } -defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>; -defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>; +defm MVE_VMINV : MVE_VMINMAXV_ty< + "vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>; +defm MVE_VMAXV : MVE_VMINMAXV_ty< + "vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>; let Predicates = [HasMVEInt] in { def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))), @@ -709,10 +829,9 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>; defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>; class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr, - bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0, - list<dag> pattern=[]> + bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0> : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix, - "$RdaDest, $Qn, $Qm", cstr, pattern> { + "$RdaDest, $Qn, $Qm", cstr, []> { bits<4> RdaDest; bits<3> Qm; bits<3> Qn; @@ -730,47 +849,88 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr, let Inst{0} = bit_0; } -multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix, - bit sz, bit bit_28, bit X, bit bit_8, bit bit_0, - list<dag> pattern=[]> { - def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix, +multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI, + bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> { + def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; - def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix, + sz, bit_28, 0b0, X, bit_8, bit_0>; + def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix, (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm), "$RdaDest = $RdaSrc", - sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; + sz, bit_28, 0b1, X, bit_8, bit_0>; + let Predicates = [HasMVEInt] in { + def : Pat<(i32 (int_arm_mve_vmldava + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 0) /* accumulator */, + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (i32 (!cast<Instruction>(NAME # x # VTI.Suffix) + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>; + + def : Pat<(i32 (int_arm_mve_vmldava_predicated + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 0) /* accumulator */, + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (i32 (!cast<Instruction>(NAME # x # VTI.Suffix) + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; + + def : Pat<(i32 (int_arm_mve_vmldava + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix) + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>; + + def : Pat<(i32 (int_arm_mve_vmldava_predicated + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix) + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; + } } -multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28, - bit bit_8, bit bit_0, list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28, - 0b0, bit_8, bit_0, pattern>; - defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28, - 0b1, bit_8, bit_0, pattern>; +multiclass MVE_VMLAMLSDAV_AX<string iname, MVEVectorVTInfo VTI, bit sz, + bit bit_28, bit bit_8, bit bit_0> { + defm "" : MVE_VMLAMLSDAV_A<iname, "", VTI, sz, bit_28, + 0b0, bit_8, bit_0>; + defm "" : MVE_VMLAMLSDAV_A<iname, "x", VTI, sz, bit_28, + 0b1, bit_8, bit_0>; } -multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8, - list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix, - sz, 0b0, bit_8, 0b0, pattern>; - defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix, - sz, 0b1, 0b0, bit_8, 0b0, pattern>; +multiclass MVE_VMLADAV_multi<MVEVectorVTInfo SVTI, MVEVectorVTInfo UVTI, + bit sz, bit bit_8> { + defm "" : MVE_VMLAMLSDAV_AX<"vmladav", SVTI, + sz, 0b0, bit_8, 0b0>; + defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", UVTI, + sz, 0b1, 0b0, bit_8, 0b0>; } -multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28, - list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix, - sz, bit_28, 0b0, 0b1, pattern>; +multiclass MVE_VMLSDAV_multi<MVEVectorVTInfo VTI, bit sz, bit bit_28> { + defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", VTI, + sz, bit_28, 0b0, 0b1>; } -defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>; -defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>; -defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v16s8, MVE_v16u8, 0b0, 0b1>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v8s16, MVE_v8u16, 0b0, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v4s32, MVE_v4u32, 0b1, 0b0>; -defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>; -defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>; -defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>; // vmlav aliases vmladav foreach acc = ["", "a"] in { @@ -932,6 +1092,16 @@ let Predicates = [HasMVEFloat] in { (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), (i32 0), + (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))), + (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + ARMVCCThen, (v4i1 VCCR:$mask), + (v4f32 MQPR:$inactive)))>; + def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), (i32 0), + (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), + (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + ARMVCCThen, (v8i1 VCCR:$mask), + (v8f16 MQPR:$inactive)))>; } def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>; @@ -942,6 +1112,16 @@ let Predicates = [HasMVEFloat] in { (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + (i32 0), (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))), + (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + ARMVCCThen, (v4i1 VCCR:$mask), + (v4f32 MQPR:$inactive)))>; + def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (i32 0), (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), + (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + ARMVCCThen, (v8i1 VCCR:$mask), + (v8f16 MQPR:$inactive)))>; } @@ -957,50 +1137,48 @@ class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size, let Inst{8} = 0b0; let Inst{6} = 0b1; let Inst{4} = bit_4; + let validForTailPredication = 1; } -multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> { - def s8 : MVE_VMINMAX<iname, "s8", 0b0, 0b00, bit_4>; - def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>; - def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>; - def u8 : MVE_VMINMAX<iname, "u8", 0b1, 0b00, bit_4>; - def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>; - def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>; -} +multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>; + defvar Inst = !cast<Instruction>(NAME); -defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>; -defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>; + let Predicates = [HasMVEInt] in { + // Unpredicated min/max + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + // Predicated min/max + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VMAX<MVEVectorVTInfo VTI> + : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>; +multiclass MVE_VMIN<MVEVectorVTInfo VTI> + : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>; + +defm MVE_VMINs8 : MVE_VMIN<MVE_v16s8>; +defm MVE_VMINs16 : MVE_VMIN<MVE_v8s16>; +defm MVE_VMINs32 : MVE_VMIN<MVE_v4s32>; +defm MVE_VMINu8 : MVE_VMIN<MVE_v16u8>; +defm MVE_VMINu16 : MVE_VMIN<MVE_v8u16>; +defm MVE_VMINu32 : MVE_VMIN<MVE_v4u32>; + +defm MVE_VMAXs8 : MVE_VMAX<MVE_v16s8>; +defm MVE_VMAXs16 : MVE_VMAX<MVE_v8s16>; +defm MVE_VMAXs32 : MVE_VMAX<MVE_v4s32>; +defm MVE_VMAXu8 : MVE_VMAX<MVE_v16u8>; +defm MVE_VMAXu16 : MVE_VMAX<MVE_v8u16>; +defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>; + // end of mve_comp instructions // start of mve_bit instructions @@ -1150,53 +1328,61 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>; } -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), - (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), - (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), - (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), - (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), - (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), - (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), - (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), - (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; +multiclass MVE_bit_op<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> { + let Predicates = [HasMVEInt] in { + // Unpredicated operation + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + // Predicated operation + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (instruction + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +defm : MVE_bit_op<MVE_v16i8, and, int_arm_mve_and_predicated, MVE_VAND>; +defm : MVE_bit_op<MVE_v8i16, and, int_arm_mve_and_predicated, MVE_VAND>; +defm : MVE_bit_op<MVE_v4i32, and, int_arm_mve_and_predicated, MVE_VAND>; +defm : MVE_bit_op<MVE_v2i64, and, int_arm_mve_and_predicated, MVE_VAND>; + +defm : MVE_bit_op<MVE_v16i8, or, int_arm_mve_orr_predicated, MVE_VORR>; +defm : MVE_bit_op<MVE_v8i16, or, int_arm_mve_orr_predicated, MVE_VORR>; +defm : MVE_bit_op<MVE_v4i32, or, int_arm_mve_orr_predicated, MVE_VORR>; +defm : MVE_bit_op<MVE_v2i64, or, int_arm_mve_orr_predicated, MVE_VORR>; + +defm : MVE_bit_op<MVE_v16i8, xor, int_arm_mve_eor_predicated, MVE_VEOR>; +defm : MVE_bit_op<MVE_v8i16, xor, int_arm_mve_eor_predicated, MVE_VEOR>; +defm : MVE_bit_op<MVE_v4i32, xor, int_arm_mve_eor_predicated, MVE_VEOR>; +defm : MVE_bit_op<MVE_v2i64, xor, int_arm_mve_eor_predicated, MVE_VEOR>; + +multiclass MVE_bit_op_with_inv<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> { + let Predicates = [HasMVEInt] in { + // Unpredicated operation + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (vnotq (VTI.Vec MQPR:$Qn)))), + (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + // Predicated operation + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (instruction + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +defm : MVE_bit_op_with_inv<MVE_v16i8, and, int_arm_mve_bic_predicated, MVE_VBIC>; +defm : MVE_bit_op_with_inv<MVE_v8i16, and, int_arm_mve_bic_predicated, MVE_VBIC>; +defm : MVE_bit_op_with_inv<MVE_v4i32, and, int_arm_mve_bic_predicated, MVE_VBIC>; +defm : MVE_bit_op_with_inv<MVE_v2i64, and, int_arm_mve_bic_predicated, MVE_VBIC>; + +defm : MVE_bit_op_with_inv<MVE_v16i8, or, int_arm_mve_orn_predicated, MVE_VORN>; +defm : MVE_bit_op_with_inv<MVE_v8i16, or, int_arm_mve_orn_predicated, MVE_VORN>; +defm : MVE_bit_op_with_inv<MVE_v4i32, or, int_arm_mve_orn_predicated, MVE_VORN>; +defm : MVE_bit_op_with_inv<MVE_v2i64, or, int_arm_mve_orn_predicated, MVE_VORN>; + class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps> : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary, iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> { @@ -1429,8 +1615,9 @@ class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]> let Inst{3-1} = Qm{2-0}; } -class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]> - : MVE_int<"vmul", suffix, size, pattern> { +class MVE_VMULt1<string iname, string suffix, bits<2> size, + list<dag> pattern=[]> + : MVE_int<iname, suffix, size, pattern> { let Inst{28} = 0b0; let Inst{25-23} = 0b110; @@ -1438,22 +1625,36 @@ class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]> let Inst{12-8} = 0b01001; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } -def MVE_VMULt1i8 : MVE_VMULt1<"i8", 0b00>; -def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>; -def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>; +multiclass MVE_VMUL_m<string iname, MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VMULt1<iname, VTI.Suffix, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + let Predicates = [HasMVEInt] in { + // Unpredicated multiply + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated multiply + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } -class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding, +multiclass MVE_VMUL<MVEVectorVTInfo VTI> + : MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>; + +defm MVE_VMULi8 : MVE_VMUL<MVE_v16i8>; +defm MVE_VMULi16 : MVE_VMUL<MVE_v8i16>; +defm MVE_VMULi32 : MVE_VMUL<MVE_v4i32>; + +class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding, list<dag> pattern=[]> : MVE_int<iname, suffix, size, pattern> { @@ -1465,18 +1666,40 @@ class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding, let Inst{0} = 0b0; } -class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]> - : MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>; -class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]> - : MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>; +multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int, + bit rounding> { + def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>; + defvar Inst = !cast<Instruction>(NAME); -def MVE_VQDMULHi8 : MVE_VQDMULH<"s8", 0b00>; -def MVE_VQDMULHi16 : MVE_VQDMULH<"s16", 0b01>; -def MVE_VQDMULHi32 : MVE_VQDMULH<"s32", 0b10>; + let Predicates = [HasMVEInt] in { + // Unpredicated multiply + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated multiply + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} -def MVE_VQRDMULHi8 : MVE_VQRDMULH<"s8", 0b00>; -def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>; -def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>; +multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding> + : MVE_VQxDMULH_m<iname, VTI, !if(rounding, int_arm_mve_vqrdmulh, + int_arm_mve_vqdmulh), + !if(rounding, int_arm_mve_qrdmulh_predicated, + int_arm_mve_qdmulh_predicated), + rounding>; + +defm MVE_VQDMULHi8 : MVE_VQxDMULH<"vqdmulh", MVE_v16s8, 0b0>; +defm MVE_VQDMULHi16 : MVE_VQxDMULH<"vqdmulh", MVE_v8s16, 0b0>; +defm MVE_VQDMULHi32 : MVE_VQxDMULH<"vqdmulh", MVE_v4s32, 0b0>; + +defm MVE_VQRDMULHi8 : MVE_VQxDMULH<"vqrdmulh", MVE_v16s8, 0b1>; +defm MVE_VQRDMULHi16 : MVE_VQxDMULH<"vqrdmulh", MVE_v8s16, 0b1>; +defm MVE_VQRDMULHi32 : MVE_VQxDMULH<"vqrdmulh", MVE_v4s32, 0b1>; class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract, list<dag> pattern=[]> @@ -1491,39 +1714,40 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract, let validForTailPredication = 1; } -class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]> - : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>; -class MVE_VSUB<string suffix, bits<2> size, list<dag> pattern=[]> - : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>; +multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>; + defvar Inst = !cast<Instruction>(NAME); -def MVE_VADDi8 : MVE_VADD<"i8", 0b00>; -def MVE_VADDi16 : MVE_VADD<"i16", 0b01>; -def MVE_VADDi32 : MVE_VADD<"i32", 0b10>; + let Predicates = [HasMVEInt] in { + // Unpredicated add/subtract + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + // Predicated add/subtract + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } -def MVE_VSUBi8 : MVE_VSUB<"i8", 0b00>; -def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>; -def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>; +multiclass MVE_VADD<MVEVectorVTInfo VTI> + : MVE_VADDSUB_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>; +multiclass MVE_VSUB<MVEVectorVTInfo VTI> + : MVE_VADDSUB_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; -} +defm MVE_VADDi8 : MVE_VADD<MVE_v16i8>; +defm MVE_VADDi16 : MVE_VADD<MVE_v8i16>; +defm MVE_VADDi32 : MVE_VADD<MVE_v4i32>; + +defm MVE_VSUBi8 : MVE_VSUB<MVE_v16i8>; +defm MVE_VSUBi16 : MVE_VSUB<MVE_v8i16>; +defm MVE_VSUBi32 : MVE_VSUB<MVE_v4i32>; class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract, - bits<2> size, ValueType vt> + bits<2> size> : MVE_int<iname, suffix, size, []> { let Inst{28} = U; @@ -1535,50 +1759,75 @@ class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract, let Inst{4} = 0b1; let Inst{0} = 0b0; let validForTailPredication = 1; +} - ValueType VT = vt; +class MVE_VQADD_<string suffix, bit U, bits<2> size> + : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size>; +class MVE_VQSUB_<string suffix, bit U, bits<2> size> + : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>; + +multiclass MVE_VQADD_m<MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + // Unpredicated saturating add + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated saturating add + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } -class MVE_VQADD<string suffix, bit U, bits<2> size, ValueType VT> - : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>; -class MVE_VQSUB<string suffix, bit U, bits<2> size, ValueType VT> - : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>; +multiclass MVE_VQADD<MVEVectorVTInfo VTI, SDNode unpred_op> + : MVE_VQADD_m<VTI, unpred_op, int_arm_mve_qadd_predicated>; + +defm MVE_VQADDs8 : MVE_VQADD<MVE_v16s8, saddsat>; +defm MVE_VQADDs16 : MVE_VQADD<MVE_v8s16, saddsat>; +defm MVE_VQADDs32 : MVE_VQADD<MVE_v4s32, saddsat>; +defm MVE_VQADDu8 : MVE_VQADD<MVE_v16u8, uaddsat>; +defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>; +defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>; + +multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + // Unpredicated saturating subtract + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated saturating subtract + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} -def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>; -def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>; -def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>; -def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>; -def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>; -def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>; +multiclass MVE_VQSUB<MVEVectorVTInfo VTI, SDNode unpred_op> + : MVE_VQSUB_m<VTI, unpred_op, int_arm_mve_qsub_predicated>; -def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>; -def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>; -def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>; -def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>; -def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>; -def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>; +defm MVE_VQSUBs8 : MVE_VQSUB<MVE_v16s8, ssubsat>; +defm MVE_VQSUBs16 : MVE_VQSUB<MVE_v8s16, ssubsat>; +defm MVE_VQSUBs32 : MVE_VQSUB<MVE_v4s32, ssubsat>; +defm MVE_VQSUBu8 : MVE_VQSUB<MVE_v16u8, usubsat>; +defm MVE_VQSUBu16 : MVE_VQSUB<MVE_v8u16, usubsat>; +defm MVE_VQSUBu32 : MVE_VQSUB<MVE_v4u32, usubsat>; -let Predicates = [HasMVEInt] in { - foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in - foreach VT = [instr.VT] in - def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), - (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; - foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in - foreach VT = [instr.VT] in - def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), - (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; - foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in - foreach VT = [instr.VT] in - def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), - (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; - foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in - foreach VT = [instr.VT] in - def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), - (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; -} - - -class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]> +class MVE_VABD_int<string suffix, bit U, bits<2> size, + list<dag> pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { let Inst{28} = U; @@ -1590,14 +1839,38 @@ class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]> let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m<MVEVectorVTInfo VTI, + Intrinsic unpred_int, Intrinsic pred_int> { + def "" : MVE_VABD_int<VTI.Suffix, VTI.Unsigned, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + // Unpredicated absolute difference + def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated absolute difference + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD<MVEVectorVTInfo VTI> + : MVE_VABD_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>; -class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]> +defm MVE_VABDs8 : MVE_VABD<MVE_v16s8>; +defm MVE_VABDs16 : MVE_VABD<MVE_v8s16>; +defm MVE_VABDs32 : MVE_VABD<MVE_v4s32>; +defm MVE_VABDu8 : MVE_VABD<MVE_v16u8>; +defm MVE_VABDu16 : MVE_VABD<MVE_v8u16>; +defm MVE_VABDu32 : MVE_VABD<MVE_v4u32>; + +class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]> : MVE_int<"vrhadd", suffix, size, pattern> { let Inst{28} = U; @@ -1609,12 +1882,36 @@ class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]> let validForTailPredication = 1; } -def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>; -def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>; -def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>; -def MVE_VRHADDu8 : MVE_VRHADD<"u8", 0b1, 0b00>; -def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>; -def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>; +multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + // Unpredicated rounding add-with-divide-by-two + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated add-with-divide-by-two + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VRHADD<MVEVectorVTInfo VTI> + : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>; + +defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>; +defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>; +defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>; +defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>; +defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>; +defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>; class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract, bits<2> size, list<dag> pattern=[]> @@ -1631,81 +1928,73 @@ class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract, let validForTailPredication = 1; } -class MVE_VHADD<string suffix, bit U, bits<2> size, +class MVE_VHADD_<string suffix, bit U, bits<2> size, list<dag> pattern=[]> : MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>; -class MVE_VHSUB<string suffix, bit U, bits<2> size, +class MVE_VHSUB_<string suffix, bit U, bits<2> size, list<dag> pattern=[]> : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>; -def MVE_VHADDs8 : MVE_VHADD<"s8", 0b0, 0b00>; -def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>; -def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>; -def MVE_VHADDu8 : MVE_VHADD<"u8", 0b1, 0b00>; -def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>; -def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>; - -def MVE_VHSUBs8 : MVE_VHSUB<"s8", 0b0, 0b00>; -def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>; -def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>; -def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>; -def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>; -def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>; +multiclass MVE_VHADD_m<MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + // Unpredicated add-and-divide-by-two + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated add-and-divide-by-two + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; +multiclass MVE_VHADD<MVEVectorVTInfo VTI> + : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>; + +defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8>; +defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16>; +defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32>; +defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8>; +defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16>; +defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32>; + +multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + // Unpredicated subtract-and-divide-by-two + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated subtract-and-divide-by-two + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VHSUB<MVEVectorVTInfo VTI> + : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated>; + +defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8>; +defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16>; +defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32>; +defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8>; +defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16>; +defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32>; + class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { @@ -1873,6 +2162,49 @@ def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>; def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>; def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>; +// int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times +// zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert +multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max, + dag zero_vec, MVE_VQABSNEG vqabs_instruction, + MVE_VQABSNEG vqneg_instruction> { + let Predicates = [HasMVEInt] in { + // The below tree can be replaced by a vqabs instruction, as it represents + // the following vectorized expression (r being the value in $reg): + // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r) + def : Pat<(VTI.Vec (vselect + (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), ARMCCgt)), + (VTI.Vec MQPR:$reg), + (VTI.Vec (vselect + (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)), + int_max, + (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))), + (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>; + // Similarly, this tree represents vqneg, i.e. the following vectorized expression: + // r == INT_MIN ? INT_MAX : -r + def : Pat<(VTI.Vec (vselect + (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)), + int_max, + (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))), + (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>; + } +} + +defm MVE_VQABSNEG_Ps8 : vqabsneg_pattern<MVE_v16i8, + (v16i8 (ARMvmovImm (i32 3712))), + (v16i8 (ARMvmovImm (i32 3711))), + (bitconvert (v4i32 (ARMvmovImm (i32 0)))), + MVE_VQABSs8, MVE_VQNEGs8>; +defm MVE_VQABSNEG_Ps16 : vqabsneg_pattern<MVE_v8i16, + (v8i16 (ARMvmovImm (i32 2688))), + (v8i16 (ARMvmvnImm (i32 2688))), + (bitconvert (v4i32 (ARMvmovImm (i32 0)))), + MVE_VQABSs16, MVE_VQNEGs16>; +defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32, + (v4i32 (ARMvmovImm (i32 1664))), + (v4i32 (ARMvmvnImm (i32 1664))), + (ARMvmovImm (i32 0)), + MVE_VQABSs32, MVE_VQNEGs32>; + class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op, dag iops, list<dag> pattern=[]> : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm", @@ -1956,6 +2288,7 @@ class MVE_VMINMAXA<string iname, string suffix, bits<2> size, let Inst{4} = 0b0; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b1; + let validForTailPredication = 1; } def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>; @@ -2049,8 +2382,8 @@ let Predicates = [HasMVEInt] in { class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th, - dag immops, list<dag> pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops), + Operand immtype, list<dag> pattern=[]> + : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm), iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> { let Inst{28} = U; let Inst{25-23} = 0b101; @@ -2059,6 +2392,9 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th, let Inst{11-6} = 0b111101; let Inst{4} = 0b0; let Inst{0} = 0b0; + + // For the MVE_VSHLL_patterns multiclass to refer to + Operand immediateType = immtype; } // The immediate VSHLL instructions accept shift counts from 1 up to @@ -2067,7 +2403,7 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th, class MVE_VSHLL_imm8<string iname, string suffix, bit U, bit th, list<dag> pattern=[]> - : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> { + : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> { bits<3> imm; let Inst{20-19} = 0b01; let Inst{18-16} = imm; @@ -2075,7 +2411,7 @@ class MVE_VSHLL_imm8<string iname, string suffix, class MVE_VSHLL_imm16<string iname, string suffix, bit U, bit th, list<dag> pattern=[]> - : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> { + : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> { bits<4> imm; let Inst{20} = 0b1; let Inst{19-16} = imm; @@ -2119,11 +2455,50 @@ defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">; defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">; defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">; +multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> { + defvar suffix = !strconcat(VTI.Suffix, !if(top, "th", "bh")); + defvar inst_imm = !cast<MVE_VSHLL_imm>("MVE_VSHLL_imm" # suffix); + defvar inst_lw = !cast<MVE_VSHLL_by_lane_width>("MVE_VSHLL_lw" # suffix); + defvar unpred_int = int_arm_mve_vshll_imm; + defvar pred_int = int_arm_mve_vshll_imm_predicated; + defvar imm = inst_imm.immediateType; + + def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), imm:$imm, + (i32 VTI.Unsigned), (i32 top))), + (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm))>; + def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits), + (i32 VTI.Unsigned), (i32 top))), + (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src)))>; + + def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm, + (i32 VTI.Unsigned), (i32 top), + (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive))), + (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm, + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive)))>; + def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits), + (i32 VTI.Unsigned), (i32 top), + (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive))), + (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen, + (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive)))>; +} + +foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in + foreach top = [0, 1] in + defm : MVE_VSHLL_patterns<VTI, top>; + +class MVE_shift_imm_partial<Operand imm, string iname, string suffix> + : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm), + iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> { + Operand immediateType = imm; +} + class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28, - dag immops, list<dag> pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", - pattern> { + Operand imm, list<dag> pattern=[]> + : MVE_shift_imm_partial<imm, iname, suffix> { bits<5> imm; let Inst{28} = bit_28; @@ -2136,45 +2511,35 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28, let Inst{0} = 0b1; } -def MVE_VRSHRNi16bh : MVE_VxSHRN< - "vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> { +def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi16th : MVE_VxSHRN< - "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> { +def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi32bh : MVE_VxSHRN< - "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> { +def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> { let Inst{20} = 0b1; } -def MVE_VRSHRNi32th : MVE_VxSHRN< - "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> { +def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> { let Inst{20} = 0b1; } -def MVE_VSHRNi16bh : MVE_VxSHRN< - "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> { +def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi16th : MVE_VxSHRN< - "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> { +def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi32bh : MVE_VxSHRN< - "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> { +def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> { let Inst{20} = 0b1; } -def MVE_VSHRNi32th : MVE_VxSHRN< - "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> { +def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> { let Inst{20} = 0b1; } -class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops, - list<dag> pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", - pattern> { +class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, + Operand imm, list<dag> pattern=[]> + : MVE_shift_imm_partial<imm, iname, suffix> { bits<5> imm; let Inst{28} = bit_28; @@ -2188,44 +2553,42 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag imm } def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN< - "vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> { + "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN< - "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> { + "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN< - "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> { + "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> { let Inst{20} = 0b1; } def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN< - "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> { + "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> { let Inst{20} = 0b1; } def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN< - "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> { + "vqshrunb", "s16", 0b0, 0b0, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs16th : MVE_VxQRSHRUN< - "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> { + "vqshrunt", "s16", 0b0, 0b1, shr_imm8> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN< - "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> { + "vqshrunb", "s32", 0b0, 0b0, shr_imm16> { let Inst{20} = 0b1; } def MVE_VQSHRUNs32th : MVE_VxQRSHRUN< - "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> { + "vqshrunt", "s32", 0b0, 0b1, shr_imm16> { let Inst{20} = 0b1; } class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12, - dag immops, list<dag> pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", - pattern> { + Operand imm, list<dag> pattern=[]> + : MVE_shift_imm_partial<imm, iname, suffix> { bits<5> imm; let Inst{25-23} = 0b101; @@ -2238,19 +2601,19 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12, } multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> { - def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> { + def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> { let Inst{28} = 0b0; let Inst{20-19} = 0b01; } - def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> { + def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> { let Inst{28} = 0b1; let Inst{20-19} = 0b01; } - def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> { + def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> { let Inst{28} = 0b0; let Inst{20} = 0b1; } - def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> { + def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> { let Inst{28} = 0b1; let Inst{20} = 0b1; } @@ -2261,6 +2624,63 @@ defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>; defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>; defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>; +multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst, + MVEVectorVTInfo OutVTI, MVEVectorVTInfo InVTI, + bit q, bit r, bit top> { + defvar inparams = (? (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm), + (inst.immediateType:$imm), (i32 q), (i32 r), + (i32 OutVTI.Unsigned), (i32 InVTI.Unsigned), (i32 top)); + defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm), + (imm:$imm)); + + def : Pat<(OutVTI.Vec !setop(inparams, int_arm_mve_vshrn)), + (OutVTI.Vec outparams)>; + def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated + (InVTI.Pred VCCR:$pred)))), + (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>; +} + +defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>; +defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16s8, MVE_v8s16, 0,0,1>; +defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,0,0>; +defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8s16, MVE_v4s32, 0,0,1>; +defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,0,0>; +defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16u8, MVE_v8u16, 0,0,1>; +defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,0,0>; +defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8u16, MVE_v4u32, 0,0,1>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,1,0>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16s8, MVE_v8s16, 0,1,1>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,1,0>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8s16, MVE_v4s32, 0,1,1>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,1,0>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16u8, MVE_v8u16, 0,1,1>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,1,0>; +defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8u16, MVE_v4u32, 0,1,1>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,0,0>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNths16, MVE_v16s8, MVE_v8s16, 1,0,1>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,0,0>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNths32, MVE_v8s16, MVE_v4s32, 1,0,1>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,0,0>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNthu16, MVE_v16u8, MVE_v8u16, 1,0,1>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,0,0>; +defm : MVE_VSHRN_patterns<MVE_VQSHRNthu32, MVE_v8u16, MVE_v4u32, 1,0,1>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,1,0>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNths16, MVE_v16s8, MVE_v8s16, 1,1,1>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,1,0>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNths32, MVE_v8s16, MVE_v4s32, 1,1,1>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,1,0>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu16, MVE_v16u8, MVE_v8u16, 1,1,1>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,1,0>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu32, MVE_v8u16, MVE_v4u32, 1,1,1>; +defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,0,0>; +defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,0,1>; +defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,0,0>; +defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,0,1>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,1,0>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,1,1>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,1,0>; +defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>; + // end of mve_imm_shift instructions // start of mve_shift instructions @@ -2293,13 +2713,31 @@ class MVE_shift_by_vec<string iname, string suffix, bit U, let validForTailPredication = 1; } +multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> { + def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>; + defvar Inst = !cast<Instruction>(NAME); + + def : Pat<(VTI.Vec (int_arm_mve_vshl_vector + (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned))), + (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>; + + def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated + (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; +} + multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> { - def s8 : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>; - def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>; - def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>; - def u8 : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>; - def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>; - def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>; + defm s8 : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>; + defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>; + defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>; + defm u8 : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>; + defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>; + defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>; } defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>; @@ -2340,11 +2778,18 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops, let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; let validForTailPredication = 1; + + // For the MVE_shift_imm_patterns multiclass to refer to + MVEVectorVTInfo VTI; + Operand immediateType; + Intrinsic unpred_int; + Intrinsic pred_int; + dag unsignedFlag = (?); } -class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm> +class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType> : MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd), - !con((ins MQPR:$Qd_src, MQPR:$Qm), imm), + (ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> { bits<6> imm; let Inst{28} = 0b1; @@ -2353,76 +2798,99 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm> let Inst{10-9} = 0b10; let Inst{8} = bit_8; let validForTailPredication = 1; + + Operand immediateType = immType; } -def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> { +def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> { let Inst{21-19} = 0b001; } -def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> { +def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> { let Inst{21-20} = 0b01; } -def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> { +def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> { let Inst{21} = 0b1; } -def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> { +def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> { let Inst{21-19} = 0b001; } -def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> { +def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> { let Inst{21-20} = 0b01; } -def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> { +def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> { let Inst{21} = 0b1; } -class MVE_VQSHL_imm<string suffix, dag imm> - : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd), - !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", +multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name, + MVEVectorVTInfo VTI> { + defvar inparams = (? (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm), + (inst.immediateType:$imm)); + defvar outparams = (inst (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm), + (inst.immediateType:$imm)); + defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name); + defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated"); + + def : Pat<(VTI.Vec !setop(inparams, unpred_int)), + (VTI.Vec outparams)>; + def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))), + (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>; +} + +defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>; +defm : MVE_VSxI_patterns<MVE_VSLIimm16, "vsli", MVE_v8i16>; +defm : MVE_VSxI_patterns<MVE_VSLIimm32, "vsli", MVE_v4i32>; +defm : MVE_VSxI_patterns<MVE_VSRIimm8, "vsri", MVE_v16i8>; +defm : MVE_VSxI_patterns<MVE_VSRIimm16, "vsri", MVE_v8i16>; +defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>; + +class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType> + : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd), + (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", vpred_r, ""> { bits<6> imm; + let Inst{28} = VTI_.Unsigned; let Inst{25-24} = 0b11; let Inst{21-16} = imm; let Inst{10-8} = 0b111; -} - -def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> { - let Inst{28} = 0b0; - let Inst{21-19} = 0b001; -} - -def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> { - let Inst{28} = 0b1; - let Inst{21-19} = 0b001; -} -def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> { - let Inst{28} = 0b0; - let Inst{21-20} = 0b01; + let VTI = VTI_; + let immediateType = immType; + let unsignedFlag = (? (i32 VTI.Unsigned)); } -def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> { - let Inst{28} = 0b1; - let Inst{21-20} = 0b01; -} - -def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> { - let Inst{28} = 0b0; - let Inst{21} = 0b1; -} - -def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> { - let Inst{28} = 0b1; - let Inst{21} = 0b1; +let unpred_int = int_arm_mve_vqshl_imm, + pred_int = int_arm_mve_vqshl_imm_predicated in { + def MVE_VQSHLimms8 : MVE_VQSHL_imm<MVE_v16s8, imm0_7> { + let Inst{21-19} = 0b001; + } + def MVE_VQSHLimmu8 : MVE_VQSHL_imm<MVE_v16u8, imm0_7> { + let Inst{21-19} = 0b001; + } + + def MVE_VQSHLimms16 : MVE_VQSHL_imm<MVE_v8s16, imm0_15> { + let Inst{21-20} = 0b01; + } + def MVE_VQSHLimmu16 : MVE_VQSHL_imm<MVE_v8u16, imm0_15> { + let Inst{21-20} = 0b01; + } + + def MVE_VQSHLimms32 : MVE_VQSHL_imm<MVE_v4s32, imm0_31> { + let Inst{21} = 0b1; + } + def MVE_VQSHLimmu32 : MVE_VQSHL_imm<MVE_v4u32, imm0_31> { + let Inst{21} = 0b1; + } } -class MVE_VQSHLU_imm<string suffix, dag imm> - : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd), - !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", +class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType> + : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd), + (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", vpred_r, ""> { bits<6> imm; @@ -2430,61 +2898,103 @@ class MVE_VQSHLU_imm<string suffix, dag imm> let Inst{25-24} = 0b11; let Inst{21-16} = imm; let Inst{10-8} = 0b110; -} -def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> { - let Inst{21-19} = 0b001; + let VTI = VTI_; + let immediateType = immType; } -def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> { - let Inst{21-20} = 0b01; -} +let unpred_int = int_arm_mve_vqshlu_imm, + pred_int = int_arm_mve_vqshlu_imm_predicated in { + def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<MVE_v16s8, imm0_7> { + let Inst{21-19} = 0b001; + } -def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> { - let Inst{21} = 0b1; + def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<MVE_v8s16, imm0_15> { + let Inst{21-20} = 0b01; + } + + def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<MVE_v4s32, imm0_31> { + let Inst{21} = 0b1; + } } -class MVE_VRSHR_imm<string suffix, dag imm> - : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd), - !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", +class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType> + : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd), + (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", vpred_r, ""> { bits<6> imm; + let Inst{28} = VTI_.Unsigned; let Inst{25-24} = 0b11; let Inst{21-16} = imm; let Inst{10-8} = 0b010; -} -def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> { - let Inst{28} = 0b0; - let Inst{21-19} = 0b001; + let VTI = VTI_; + let immediateType = immType; + let unsignedFlag = (? (i32 VTI.Unsigned)); } -def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> { - let Inst{28} = 0b1; - let Inst{21-19} = 0b001; -} +let unpred_int = int_arm_mve_vrshr_imm, + pred_int = int_arm_mve_vrshr_imm_predicated in { + def MVE_VRSHR_imms8 : MVE_VRSHR_imm<MVE_v16s8, shr_imm8> { + let Inst{21-19} = 0b001; + } -def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> { - let Inst{28} = 0b0; - let Inst{21-20} = 0b01; -} + def MVE_VRSHR_immu8 : MVE_VRSHR_imm<MVE_v16u8, shr_imm8> { + let Inst{21-19} = 0b001; + } -def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> { - let Inst{28} = 0b1; - let Inst{21-20} = 0b01; -} + def MVE_VRSHR_imms16 : MVE_VRSHR_imm<MVE_v8s16, shr_imm16> { + let Inst{21-20} = 0b01; + } -def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> { - let Inst{28} = 0b0; - let Inst{21} = 0b1; -} + def MVE_VRSHR_immu16 : MVE_VRSHR_imm<MVE_v8u16, shr_imm16> { + let Inst{21-20} = 0b01; + } -def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> { - let Inst{28} = 0b1; - let Inst{21} = 0b1; + def MVE_VRSHR_imms32 : MVE_VRSHR_imm<MVE_v4s32, shr_imm32> { + let Inst{21} = 0b1; + } + + def MVE_VRSHR_immu32 : MVE_VRSHR_imm<MVE_v4u32, shr_imm32> { + let Inst{21} = 0b1; + } } +multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> { + def : Pat<(inst.VTI.Vec !con((inst.unpred_int (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm), + inst.unsignedFlag)), + (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm))>; + + def : Pat<(inst.VTI.Vec !con((inst.pred_int (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm), + inst.unsignedFlag, + (? (inst.VTI.Pred VCCR:$mask), + (inst.VTI.Vec MQPR:$inactive)))), + (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm, + ARMVCCThen, (inst.VTI.Pred VCCR:$mask), + (inst.VTI.Vec MQPR:$inactive)))>; +} + +defm : MVE_shift_imm_patterns<MVE_VQSHLimms8>; +defm : MVE_shift_imm_patterns<MVE_VQSHLimmu8>; +defm : MVE_shift_imm_patterns<MVE_VQSHLimms16>; +defm : MVE_shift_imm_patterns<MVE_VQSHLimmu16>; +defm : MVE_shift_imm_patterns<MVE_VQSHLimms32>; +defm : MVE_shift_imm_patterns<MVE_VQSHLimmu32>; +defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms8>; +defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms16>; +defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms32>; +defm : MVE_shift_imm_patterns<MVE_VRSHR_imms8>; +defm : MVE_shift_imm_patterns<MVE_VRSHR_immu8>; +defm : MVE_shift_imm_patterns<MVE_VRSHR_imms16>; +defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>; +defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>; +defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>; + class MVE_VSHR_imm<string suffix, dag imm> : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd), !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", @@ -2550,27 +3060,39 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> { let Inst{21} = 0b1; } +multiclass MVE_immediate_shift_patterns_inner< + MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op, + Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> { + + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)), + (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>; + + def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm), + !dag(pred_int, unsignedFlag, ?), + (pred_int (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))), + (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm, + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; +} + +multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI, + Operand imm_operand_type> { + defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type, + ARMvshlImm, int_arm_mve_shl_imm_predicated, + !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>; + defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type, + ARMvshruImm, int_arm_mve_shr_imm_predicated, + !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>; + defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type, + ARMvshrsImm, int_arm_mve_shr_imm_predicated, + !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>; +} + let Predicates = [HasMVEInt] in { - def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)), - (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>; - def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)), - (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>; - def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)), - (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>; - - def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)), - (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>; - def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)), - (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>; - def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)), - (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>; - - def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)), - (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>; - def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)), - (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>; - def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)), - (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>; + defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>; + defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>; + defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>; } // end of mve_shift instructions @@ -2652,8 +3174,8 @@ class MVEFloatArithNeon<string iname, string suffix, bit size, let Inst{16} = 0b0; } -class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]> - : MVEFloatArithNeon<"vmul", suffix, size, (outs MQPR:$Qd), +class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]> + : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "", pattern> { bits<4> Qd; @@ -2671,20 +3193,32 @@ class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]> let validForTailPredication = 1; } -def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>; -def MVE_VMULf16 : MVE_VMUL_fp<"f16", 0b1>; +multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>; + defvar Inst = !cast<Instruction>(NAME); -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (fmul (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), - (v4f32 (MVE_VMULf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; - def : Pat<(v8f16 (fmul (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), - (v8f16 (MVE_VMULf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } -class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]> +multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI> + : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>; + +defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>; +defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>; + +class MVE_VCMLA<string suffix, bit size> : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> { bits<4> Qd; bits<4> Qn; bits<2> rot; @@ -2701,8 +3235,31 @@ class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]> let Inst{4} = 0b0; } -def MVE_VCMLAf16 : MVE_VCMLA<"f16", 0b0>; -def MVE_VCMLAf32 : MVE_VCMLA<"f32", 0b1>; +multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> { + def "" : MVE_VCMLA<VTI.Suffix, size>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (int_arm_mve_vcmlaq + imm:$rot, (VTI.Vec MQPR:$Qd_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + imm:$rot))>; + + def : Pat<(VTI.Vec (int_arm_mve_vcmlaq_predicated + imm:$rot, (VTI.Vec MQPR:$Qd_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn), + (VTI.Vec MQPR:$Qm), imm:$rot, + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; + + } +} + +defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16, 0b0>; +defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, 0b1>; class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4, bit bit_8, bit bit_21, dag iops=(ins), @@ -2736,63 +3293,50 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1, def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; -let Predicates = [HasMVEFloat, UseFusedMAC] in { - def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1), - (fmul (v8f16 MQPR:$src2), - (v8f16 MQPR:$src3)))), - (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>; - def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1), - (fmul (v4f32 MQPR:$src2), - (v4f32 MQPR:$src3)))), - (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>; - - def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1), - (fmul (v8f16 MQPR:$src2), - (v8f16 MQPR:$src3)))), - (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>; - def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1), - (fmul (v4f32 MQPR:$src2), - (v4f32 MQPR:$src3)))), - (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>; -} - let Predicates = [HasMVEFloat] in { def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>; def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>; + def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), + (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>; + def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), + (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>; } - -let validForTailPredication = 1 in { - def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; - def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; -} - -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), - (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; - def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), - (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; +multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int> { + def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> { + let validForTailPredication = 1; + } + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI> + : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>; +multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI> + : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>; -let validForTailPredication = 1 in { - def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; - def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; -} +defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>; +defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>; -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), - (v4f32 (MVE_VSUBf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; - def : Pat<(v8f16 (fsub (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), - (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; -} +defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>; +defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>; -class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]> +class MVE_VCADD<string suffix, bit size, string cstr=""> : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> { bits<4> Qd; bits<4> Qn; bit rot; @@ -2810,8 +3354,29 @@ class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]> let Inst{4} = 0b0; } -def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>; -def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">; +multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> { + def "" : MVE_VCADD<VTI.Suffix, size, cstr>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (int_arm_mve_vcaddq (i32 1), + imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + imm:$rot))>; + + def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated (i32 1), + imm:$rot, (VTI.Vec MQPR:$inactive), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + + } +} + +defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16, 0b0>; +defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, 0b1, "@earlyclobber $Qd">; class MVE_VABD_fp<string suffix, bit size> : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), @@ -2833,8 +3398,29 @@ class MVE_VABD_fp<string suffix, bit size> let validForTailPredication = 1; } -def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>; -def MVE_VABDf16 : MVE_VABD_fp<"f16", 0b1>; +multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI, + Intrinsic unpred_int, Intrinsic pred_int> { + def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 0))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 0), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI> + : MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>; + +defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>; +defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>; class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op, Operand imm_operand_type, list<dag> pattern=[]> @@ -3186,120 +3772,120 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>; def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; -multiclass unpred_vcmp_z<string suffix, int fc> { - def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))), +multiclass unpred_vcmp_z<string suffix, PatLeaf fc> { + def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)), (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; - def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))), + def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)), (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; - def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))), + def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)), (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))), - (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))), - (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))), - (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; } -multiclass unpred_vcmp_r<string suffix, int fc> { - def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))), +multiclass unpred_vcmp_r<string suffix, PatLeaf fc> { + def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)), (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; - def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))), + def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)), (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; - def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))), + def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)), (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; - def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))), + def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)), (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))), + def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)), (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))), + def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)), (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))), - (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))), - (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))), - (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))), - (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))), - (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))), - (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; } -multiclass unpred_vcmpf_z<int fc> { - def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), +multiclass unpred_vcmpf_z<PatLeaf fc> { + def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; - def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))), + def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))), - (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))), + (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>; } multiclass unpred_vcmpf_r<int fc> { - def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), + def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; - def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), + def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)), (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; - def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))), + def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; - def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), + def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), - (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), - (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>; } let Predicates = [HasMVEInt] in { - defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>; - defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>; - defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>; - defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>; - defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>; - defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>; - defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; - defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; - - defm MVE_VCEQ : unpred_vcmp_r<"i", 0>; - defm MVE_VCNE : unpred_vcmp_r<"i", 1>; - defm MVE_VCGE : unpred_vcmp_r<"s", 10>; - defm MVE_VCLT : unpred_vcmp_r<"s", 11>; - defm MVE_VCGT : unpred_vcmp_r<"s", 12>; - defm MVE_VCLE : unpred_vcmp_r<"s", 13>; - defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; - defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; + defm MVE_VCEQZ : unpred_vcmp_z<"i", ARMCCeq>; + defm MVE_VCNEZ : unpred_vcmp_z<"i", ARMCCne>; + defm MVE_VCGEZ : unpred_vcmp_z<"s", ARMCCge>; + defm MVE_VCLTZ : unpred_vcmp_z<"s", ARMCClt>; + defm MVE_VCGTZ : unpred_vcmp_z<"s", ARMCCgt>; + defm MVE_VCLEZ : unpred_vcmp_z<"s", ARMCCle>; + defm MVE_VCGTUZ : unpred_vcmp_z<"u", ARMCChi>; + defm MVE_VCGEUZ : unpred_vcmp_z<"u", ARMCChs>; + + defm MVE_VCEQ : unpred_vcmp_r<"i", ARMCCeq>; + defm MVE_VCNE : unpred_vcmp_r<"i", ARMCCne>; + defm MVE_VCGE : unpred_vcmp_r<"s", ARMCCge>; + defm MVE_VCLT : unpred_vcmp_r<"s", ARMCClt>; + defm MVE_VCGT : unpred_vcmp_r<"s", ARMCCgt>; + defm MVE_VCLE : unpred_vcmp_r<"s", ARMCCle>; + defm MVE_VCGTU : unpred_vcmp_r<"u", ARMCChi>; + defm MVE_VCGEU : unpred_vcmp_r<"u", ARMCChs>; } let Predicates = [HasMVEFloat] in { - defm MVE_VFCEQZ : unpred_vcmpf_z<0>; - defm MVE_VFCNEZ : unpred_vcmpf_z<1>; - defm MVE_VFCGEZ : unpred_vcmpf_z<10>; - defm MVE_VFCLTZ : unpred_vcmpf_z<11>; - defm MVE_VFCGTZ : unpred_vcmpf_z<12>; - defm MVE_VFCLEZ : unpred_vcmpf_z<13>; + defm MVE_VFCEQZ : unpred_vcmpf_z<ARMCCeq>; + defm MVE_VFCNEZ : unpred_vcmpf_z<ARMCCne>; + defm MVE_VFCGEZ : unpred_vcmpf_z<ARMCCge>; + defm MVE_VFCLTZ : unpred_vcmpf_z<ARMCClt>; + defm MVE_VFCGTZ : unpred_vcmpf_z<ARMCCgt>; + defm MVE_VFCLEZ : unpred_vcmpf_z<ARMCCle>; - defm MVE_VFCEQ : unpred_vcmpf_r<0>; - defm MVE_VFCNE : unpred_vcmpf_r<1>; - defm MVE_VFCGE : unpred_vcmpf_r<10>; - defm MVE_VFCLT : unpred_vcmpf_r<11>; - defm MVE_VFCGT : unpred_vcmpf_r<12>; - defm MVE_VFCLE : unpred_vcmpf_r<13>; + defm MVE_VFCEQ : unpred_vcmpf_r<ARMCCeq>; + defm MVE_VFCNE : unpred_vcmpf_r<ARMCCne>; + defm MVE_VFCGE : unpred_vcmpf_r<ARMCCge>; + defm MVE_VFCLT : unpred_vcmpf_r<ARMCClt>; + defm MVE_VFCGT : unpred_vcmpf_r<ARMCCgt>; + defm MVE_VFCLE : unpred_vcmpf_r<ARMCCle>; } @@ -3403,10 +3989,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>; defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>; defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>; -class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag> pattern=[]> +class MVE_VCMUL<string iname, string suffix, bit size, string cstr=""> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> { bits<4> Qn; bits<2> rot; @@ -3422,8 +4008,30 @@ class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag> let Predicates = [HasMVEFloat]; } -def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>; -def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">; +multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI, + bit size, string cstr=""> { + def "" : MVE_VCMUL<iname, VTI.Suffix, size, cstr>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (int_arm_mve_vcmulq + imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + imm:$rot))>; + + def : Pat<(VTI.Vec (int_arm_mve_vcmulq_predicated + imm:$rot, (VTI.Vec MQPR:$inactive), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + + } +} + +defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>; +defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">; class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20, bit T, string cstr, list<dag> pattern=[]> @@ -3442,29 +4050,80 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20, let Inst{8} = 0b0; let Inst{7} = Qn{3}; let Inst{0} = 0b0; + let validForTailPredication = 1; } -multiclass MVE_VMULL_multi<string iname, string suffix, - bit bit_28, bits<2> bits_21_20, string cstr=""> { - def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0, cstr>; - def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1, cstr>; +multiclass MVE_VMULL_m<MVEVectorVTInfo VTI, + SDNode unpred_op, Intrinsic pred_int, + bit Top, string cstr=""> { + def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned, + VTI.Size, Top, cstr>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + defvar uflag = !if(!eq(VTI.SuffixLetter, "p"), (?), (? (i32 VTI.Unsigned))); + + // Unpredicated multiply + def : Pat<(VTI.DblVec !con((unpred_op (VTI.Vec MQPR:$Qm), + (VTI.Vec MQPR:$Qn)), + uflag, (? (i32 Top)))), + (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated multiply + def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm), + (VTI.Vec MQPR:$Qn)), + uflag, (? (i32 Top), (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive)))), + (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive)))>; + } } -// For integer multiplies, bits 21:20 encode size, and bit 28 signedness. -// For polynomial multiplies, bits 21:20 take the unused value 0b11, and -// bit 28 switches to encoding the size. - -defm MVE_VMULLs8 : MVE_VMULL_multi<"vmull", "s8", 0b0, 0b00>; -defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>; -defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">; -defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>; -defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>; -defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">; -defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>; -defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>; - -class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, - bit round, list<dag> pattern=[]> +// For polynomial multiplies, the size bits take the unused value 0b11, and +// the unsigned bit switches to encoding the size. + +defm MVE_VMULLBs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b0>; +defm MVE_VMULLTs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b1>; +defm MVE_VMULLBs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b0>; +defm MVE_VMULLTs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b1>; +defm MVE_VMULLBs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b0, + "@earlyclobber $Qd">; +defm MVE_VMULLTs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b1, + "@earlyclobber $Qd">; + +defm MVE_VMULLBu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b0>; +defm MVE_VMULLTu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b1>; +defm MVE_VMULLBu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b0>; +defm MVE_VMULLTu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b1>; +defm MVE_VMULLBu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b0, + "@earlyclobber $Qd">; +defm MVE_VMULLTu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull, + int_arm_mve_mull_int_predicated, 0b1, + "@earlyclobber $Qd">; + +defm MVE_VMULLBp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly, + int_arm_mve_mull_poly_predicated, 0b0>; +defm MVE_VMULLTp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly, + int_arm_mve_mull_poly_predicated, 0b1>; +defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly, + int_arm_mve_mull_poly_predicated, 0b0>; +defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly, + int_arm_mve_mull_poly_predicated, 0b1>; + +class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round, + list<dag> pattern=[]> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "", pattern> { @@ -3480,19 +4139,46 @@ class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, let Inst{0} = 0b1; } -def MVE_VMULHs8 : MVE_VxMULH<"vmulh", "s8", 0b0, 0b00, 0b0>; -def MVE_VMULHs16 : MVE_VxMULH<"vmulh", "s16", 0b0, 0b01, 0b0>; -def MVE_VMULHs32 : MVE_VxMULH<"vmulh", "s32", 0b0, 0b10, 0b0>; -def MVE_VMULHu8 : MVE_VxMULH<"vmulh", "u8", 0b1, 0b00, 0b0>; -def MVE_VMULHu16 : MVE_VxMULH<"vmulh", "u16", 0b1, 0b01, 0b0>; -def MVE_VMULHu32 : MVE_VxMULH<"vmulh", "u32", 0b1, 0b10, 0b0>; +multiclass MVE_VxMULH_m<string iname, MVEVectorVTInfo VTI, SDNode unpred_op, + Intrinsic pred_int, bit round> { + def "" : MVE_VxMULH<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, round>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + // Unpredicated multiply returning high bits + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated multiply returning high bits + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VMULT<string iname, MVEVectorVTInfo VTI, bit round> + : MVE_VxMULH_m<iname, VTI, !if(round, int_arm_mve_vrmulh, int_arm_mve_vmulh), + !if(round, int_arm_mve_rmulh_predicated, + int_arm_mve_mulh_predicated), + round>; + +defm MVE_VMULHs8 : MVE_VMULT<"vmulh", MVE_v16s8, 0b0>; +defm MVE_VMULHs16 : MVE_VMULT<"vmulh", MVE_v8s16, 0b0>; +defm MVE_VMULHs32 : MVE_VMULT<"vmulh", MVE_v4s32, 0b0>; +defm MVE_VMULHu8 : MVE_VMULT<"vmulh", MVE_v16u8, 0b0>; +defm MVE_VMULHu16 : MVE_VMULT<"vmulh", MVE_v8u16, 0b0>; +defm MVE_VMULHu32 : MVE_VMULT<"vmulh", MVE_v4u32, 0b0>; -def MVE_VRMULHs8 : MVE_VxMULH<"vrmulh", "s8", 0b0, 0b00, 0b1>; -def MVE_VRMULHs16 : MVE_VxMULH<"vrmulh", "s16", 0b0, 0b01, 0b1>; -def MVE_VRMULHs32 : MVE_VxMULH<"vrmulh", "s32", 0b0, 0b10, 0b1>; -def MVE_VRMULHu8 : MVE_VxMULH<"vrmulh", "u8", 0b1, 0b00, 0b1>; -def MVE_VRMULHu16 : MVE_VxMULH<"vrmulh", "u16", 0b1, 0b01, 0b1>; -def MVE_VRMULHu32 : MVE_VxMULH<"vrmulh", "u32", 0b1, 0b10, 0b1>; +defm MVE_VRMULHs8 : MVE_VMULT<"vrmulh", MVE_v16s8, 0b1>; +defm MVE_VRMULHs16 : MVE_VMULT<"vrmulh", MVE_v8s16, 0b1>; +defm MVE_VRMULHs32 : MVE_VMULT<"vrmulh", MVE_v4s32, 0b1>; +defm MVE_VRMULHu8 : MVE_VMULT<"vrmulh", MVE_v16u8, 0b1>; +defm MVE_VRMULHu16 : MVE_VMULT<"vrmulh", MVE_v8u16, 0b1>; +defm MVE_VRMULHu32 : MVE_VMULT<"vrmulh", MVE_v4u32, 0b1>; class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17, bits<2> size, bit T, list<dag> pattern=[]> @@ -3551,19 +4237,36 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T, let Predicates = [HasMVEFloat]; } -multiclass MVE_VCVT_ff_halves<string suffix, bit op> { - def bh : MVE_VCVT_ff<"vcvtb", suffix, op, 0b0>; - def th : MVE_VCVT_ff<"vcvtt", suffix, op, 0b1>; +multiclass MVE_VCVT_f2h_m<string iname, int half> { + def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(v8f16 (int_arm_mve_vcvt_narrow + (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))), + (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>; + def : Pat<(v8f16 (int_arm_mve_vcvt_narrow_predicated + (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half), + (v4i1 VCCR:$mask))), + (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), + ARMVCCThen, (v4i1 VCCR:$mask)))>; + } } -defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>; -defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>; +multiclass MVE_VCVT_h2f_m<string iname, int half> { + def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half>; +} + +defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>; +defm MVE_VCVTf16f32th : MVE_VCVT_f2h_m<"vcvtt", 0b1>; +defm MVE_VCVTf32f16bh : MVE_VCVT_h2f_m<"vcvtb", 0b0>; +defm MVE_VCVTf32f16th : MVE_VCVT_h2f_m<"vcvtt", 0b1>; class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve, - string cstr="", list<dag> pattern=[]> + string cstr=""> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> { bits<4> Qn; bit rot; @@ -3577,13 +4280,35 @@ class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve, let Inst{0} = 0b0; } -def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>; -def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>; -def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">; +multiclass MVE_VxCADD_m<string iname, MVEVectorVTInfo VTI, + bit halve, string cstr=""> { + def "" : MVE_VxCADD<iname, VTI.Suffix, VTI.Size, halve, cstr>; + defvar Inst = !cast<Instruction>(NAME); + + let Predicates = [HasMVEInt] in { + def : Pat<(VTI.Vec (int_arm_mve_vcaddq halve, + imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + imm:$rot))>; + + def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated halve, + imm:$rot, (VTI.Vec MQPR:$inactive), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + + } +} + +defm MVE_VCADDi8 : MVE_VxCADD_m<"vcadd", MVE_v16i8, 0b1>; +defm MVE_VCADDi16 : MVE_VxCADD_m<"vcadd", MVE_v8i16, 0b1>; +defm MVE_VCADDi32 : MVE_VxCADD_m<"vcadd", MVE_v4i32, 0b1, "@earlyclobber $Qd">; -def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>; -def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>; -def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">; +defm MVE_VHCADDs8 : MVE_VxCADD_m<"vhcadd", MVE_v16s8, 0b0>; +defm MVE_VHCADDs16 : MVE_VxCADD_m<"vhcadd", MVE_v8s16, 0b0>; +defm MVE_VHCADDs32 : MVE_VxCADD_m<"vhcadd", MVE_v4s32, 0b0, "@earlyclobber $Qd">; class MVE_VADCSBC<string iname, bit I, bit subtract, dag carryin, list<dag> pattern=[]> @@ -3627,6 +4352,7 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T, let Inst{8} = 0b1; let Inst{7} = Qn{3}; let Inst{0} = 0b1; + let validForTailPredication = 1; } multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> { @@ -3742,6 +4468,7 @@ class MVE_VQDMULL_qr<string iname, string suffix, bit size, let Inst{12} = T; let Inst{8} = 0b1; let Inst{5} = 0b1; + let validForTailPredication = 1; } multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> { @@ -3804,13 +4531,30 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size, let validForTailPredication = 1; } +multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> { + def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>; + defvar Inst = !cast<Instruction>(NAME); + + def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar + (VTI.Vec MQPR:$in), (i32 rGPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned))), + (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>; + + def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated + (VTI.Vec MQPR:$in), (i32 rGPR:$sh), + (i32 q), (i32 r), (i32 VTI.Unsigned), + (VTI.Pred VCCR:$mask))), + (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; +} + multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> { - def s8 : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>; - def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>; - def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>; - def u8 : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>; - def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>; - def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>; + defm s8 : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>; + defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>; + defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>; + defm u8 : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>; + defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>; + defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>; } defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>; @@ -4054,7 +4798,7 @@ def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; let hasSideEffects = 1 in -class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]> +class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { bits<4> Rn; @@ -4072,20 +4816,22 @@ class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]> let validForTailPredication = 1; } -def MVE_VCTP8 : MVE_VCTP<"8", 0b00>; -def MVE_VCTP16 : MVE_VCTP<"16", 0b01>; -def MVE_VCTP32 : MVE_VCTP<"32", 0b10>; -def MVE_VCTP64 : MVE_VCTP<"64", 0b11>; +multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> { + def "": MVE_VCTPInst<VTI.BitsSuffix, VTI.Size>; + defvar Inst = !cast<Instruction>(NAME); -let Predicates = [HasMVEInt] in { - def : Pat<(int_arm_vctp8 rGPR:$Rn), - (v16i1 (MVE_VCTP8 rGPR:$Rn))>; - def : Pat<(int_arm_vctp16 rGPR:$Rn), - (v8i1 (MVE_VCTP16 rGPR:$Rn))>; - def : Pat<(int_arm_vctp32 rGPR:$Rn), - (v4i1 (MVE_VCTP32 rGPR:$Rn))>; + let Predicates = [HasMVEInt] in { + def : Pat<(intr rGPR:$Rn), (VTI.Pred (Inst rGPR:$Rn))>; + def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)), + (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>; + } } +defm MVE_VCTP8 : MVE_VCTP<MVE_v16i8, int_arm_mve_vctp8>; +defm MVE_VCTP16 : MVE_VCTP<MVE_v8i16, int_arm_mve_vctp16>; +defm MVE_VCTP32 : MVE_VCTP<MVE_v4i32, int_arm_mve_vctp32>; +defm MVE_VCTP64 : MVE_VCTP<MVE_v2i64, int_arm_mve_vctp64>; + // end of mve_qDest_rSrc // start of coproc mov @@ -4258,6 +5004,29 @@ foreach wb = [MVE_vldst24_writeback< "vst" # n.nvecs # stage # "." # s.lanesize>; } +multiclass MVE_vst24_patterns<int lanesize, ValueType VT> { + foreach stage = [0,1] in + def : Pat<(int_arm_mve_vst2q i32:$addr, + (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)), + (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize) + (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1), + t2_addr_offset_none:$addr)>; + + foreach stage = [0,1,2,3] in + def : Pat<(int_arm_mve_vst4q i32:$addr, + (VT MQPR:$v0), (VT MQPR:$v1), + (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)), + (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize) + (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1, + VT:$v2, qsub_2, VT:$v3, qsub_3), + t2_addr_offset_none:$addr)>; +} +defm : MVE_vst24_patterns<8, v16i8>; +defm : MVE_vst24_patterns<16, v8i16>; +defm : MVE_vst24_patterns<32, v4i32>; +defm : MVE_vst24_patterns<16, v8f16>; +defm : MVE_vst24_patterns<32, v4f32>; + // end of MVE interleaving load/store // start of MVE predicable load/store @@ -4513,28 +5282,90 @@ class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz, string asm, string suffix, bit U, bits<2> size> : MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>; +// Multiclasses wrapping that to add ISel patterns for intrinsics. +multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> { + defm "": MVE_VLDRSTR_rq_w<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter, + VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>; + defvar Inst = !cast<Instruction>(NAME); + defvar InstU = !cast<Instruction>(NAME # "_u"); + + foreach VTI = VTIs in + foreach UnsignedFlag = !if(!eq(VTI.Size, memsz.encoding), + [0,1], [VTI.Unsigned]) in { + def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag)), + (VTI.Vec (InstU GPR:$base, MQPR:$offsets))>; + def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)), + (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>; + def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))), + (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>; + def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))), + (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>; + } +} +multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> { + def "": MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb", + VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>; + defvar Inst = !cast<Instruction>(NAME); + + foreach VTI = VTIs in { + def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)), + (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>; + def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))), + (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>; + } +} +multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> { + defm "": MVE_VLDRSTR_rq_w<MVE_st, memsz, "vstr" # memsz.MnemonicLetter, + VTIs[0].BitsSuffix, 0, VTIs[0].Size>; + defvar Inst = !cast<Instruction>(NAME); + defvar InstU = !cast<Instruction>(NAME # "_u"); + + foreach VTI = VTIs in { + def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0), + (InstU MQPR:$data, GPR:$base, MQPR:$offsets)>; + def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift), + (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>; + def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)), + (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>; + def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)), + (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>; + } +} +multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> { + def "": MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb", + VTIs[0].BitsSuffix, 0, VTIs[0].Size>; + defvar Inst = !cast<Instruction>(NAME); + + foreach VTI = VTIs in { + def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0), + (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>; + def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)), + (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>; + } +} + // Actually define all the loads and stores in this family. -def MVE_VLDRBU8_rq : MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u8", 1,0b00>; -def MVE_VLDRBU16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u16", 1,0b01>; -def MVE_VLDRBS16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s16", 0,0b01>; -def MVE_VLDRBU32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u32", 1,0b10>; -def MVE_VLDRBS32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s32", 0,0b10>; +defm MVE_VLDRBU8_rq : MVE_VLDR_rq_b<[MVE_v16u8,MVE_v16s8]>; +defm MVE_VLDRBU16_rq: MVE_VLDR_rq_b<[MVE_v8u16]>; +defm MVE_VLDRBS16_rq: MVE_VLDR_rq_b<[MVE_v8s16]>; +defm MVE_VLDRBU32_rq: MVE_VLDR_rq_b<[MVE_v4u32]>; +defm MVE_VLDRBS32_rq: MVE_VLDR_rq_b<[MVE_v4s32]>; -defm MVE_VLDRHU16_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u16", 1,0b01>; -defm MVE_VLDRHU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u32", 1,0b10>; -defm MVE_VLDRHS32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","s32", 0,0b10>; -defm MVE_VLDRWU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memW, "vldrw","u32", 1,0b10>; -defm MVE_VLDRDU64_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memD, "vldrd","u64", 1,0b11>; +defm MVE_VLDRHU16_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v8u16,MVE_v8s16,MVE_v8f16]>; +defm MVE_VLDRHU32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4u32]>; +defm MVE_VLDRHS32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4s32]>; +defm MVE_VLDRWU32_rq: MVE_VLDR_rq_w<MVE_memW, [MVE_v4u32,MVE_v4s32,MVE_v4f32]>; +defm MVE_VLDRDU64_rq: MVE_VLDR_rq_w<MVE_memD, [MVE_v2u64,MVE_v2s64]>; -def MVE_VSTRB8_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","8", 0,0b00>; -def MVE_VSTRB16_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","16", 0,0b01>; -def MVE_VSTRB32_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","32", 0,0b10>; +defm MVE_VSTRB8_rq : MVE_VSTR_rq_b<[MVE_v16i8]>; +defm MVE_VSTRB16_rq : MVE_VSTR_rq_b<[MVE_v8i16]>; +defm MVE_VSTRB32_rq : MVE_VSTR_rq_b<[MVE_v4i32]>; -defm MVE_VSTRH16_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","16", 0,0b01>; -defm MVE_VSTRH32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","32", 0,0b10>; -defm MVE_VSTRW32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memW, "vstrw","32", 0,0b10>; -defm MVE_VSTRD64_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memD, "vstrd","64", 0,0b11>; +defm MVE_VSTRH16_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v8i16,MVE_v8f16]>; +defm MVE_VSTRH32_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v4i32]>; +defm MVE_VSTRW32_rq : MVE_VSTR_rq_w<MVE_memW, [MVE_v4i32,MVE_v4f32]>; +defm MVE_VSTRD64_rq : MVE_VSTR_rq_w<MVE_memD, [MVE_v2i64]>; // Gather loads / scatter stores whose address operand is of the form // [Qm,#imm], i.e. a vector containing a full base address for each @@ -4573,11 +5404,58 @@ multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz, } } +// Multiclasses wrapping that one, adding selection patterns for the +// non-writeback loads and all the stores. (The writeback loads must +// deliver multiple output values, so they have to be selected by C++ +// code.) +multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI, + list<MVEVectorVTInfo> DVTIs> { + defm "" : MVE_VLDRSTR_qi_m<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter, + "u" # memsz.TypeBits>; + defvar Inst = !cast<Instruction>(NAME); + + foreach DVTI = DVTIs in { + def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base + (AVTI.Vec MQPR:$addr), (i32 imm:$offset))), + (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>; + def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated + (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))), + (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset), + ARMVCCThen, VCCR:$pred))>; + } +} +multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI, + list<MVEVectorVTInfo> DVTIs> { + defm "" : MVE_VLDRSTR_qi_m<MVE_st, memsz, "vstr" # memsz.MnemonicLetter, + !cast<string>(memsz.TypeBits)>; + defvar Inst = !cast<Instruction>(NAME); + defvar InstPre = !cast<Instruction>(NAME # "_pre"); + + foreach DVTI = DVTIs in { + def : Pat<(int_arm_mve_vstr_scatter_base + (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data)), + (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), + (i32 imm:$offset))>; + def : Pat<(int_arm_mve_vstr_scatter_base_predicated + (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)), + (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), + (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>; + def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb + (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))), + (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), + (i32 imm:$offset)))>; + def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated + (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))), + (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr), + (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>; + } +} + // Actual instruction definitions. -defm MVE_VLDRWU32_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memW, "vldrw", "u32">; -defm MVE_VLDRDU64_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memD, "vldrd", "u64">; -defm MVE_VSTRW32_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memW, "vstrw", "32">; -defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">; +defm MVE_VLDRWU32_qi: MVE_VLDR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>; +defm MVE_VLDRDU64_qi: MVE_VLDR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>; +defm MVE_VSTRW32_qi: MVE_VSTR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>; +defm MVE_VSTRD64_qi: MVE_VSTR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>; // Define aliases for all the instructions where memory size and // vector lane size are the same. These are mnemonic aliases, so they @@ -4595,21 +5473,21 @@ defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">; foreach vpt_cond = ["", "t", "e"] in foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in foreach suffix = memsz.suffixes in { + // Define an alias with every suffix in the list, except for the one + // used by the real Instruction record (i.e. the one that all the + // rest are aliases *for*). + + if !ne(suffix, memsz.CanonLoadSuffix) then { + def : MnemonicAlias< + "vldr" # memsz.MnemonicLetter # vpt_cond # suffix, + "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>; + } - // These foreaches are conceptually ifs, implemented by iterating a - // dummy variable over a list with 0 or 1 elements depending on the - // condition. The idea is to iterate over _nearly_ all the suffixes - // in memsz.suffixes, but omit the one we want all the others to alias. - - foreach _ = !if(!ne(suffix, memsz.CanonLoadSuffix), [1], []<int>) in - def : MnemonicAlias< - "vldr" # memsz.MnemonicLetter # vpt_cond # suffix, - "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>; - - foreach _ = !if(!ne(suffix, memsz.CanonStoreSuffix), [1], []<int>) in - def : MnemonicAlias< - "vstr" # memsz.MnemonicLetter # vpt_cond # suffix, - "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>; + if !ne(suffix, memsz.CanonStoreSuffix) then { + def : MnemonicAlias< + "vstr" # memsz.MnemonicLetter # vpt_cond # suffix, + "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>; + } } // end of MVE predicable load/store @@ -4632,7 +5510,6 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte let Inst{4} = 0b0; let Defs = [VPR]; - let validForTailPredication = 1; } class MVE_VPTt1<string suffix, bits<2> size, dag iops> @@ -4644,7 +5521,6 @@ class MVE_VPTt1<string suffix, bits<2> size, dag iops> let Inst{5} = Qm{3}; let Inst{3-1} = Qm{2-0}; let Inst{0} = fc{1}; - let validForTailPredication = 1; } class MVE_VPTt1i<string suffix, bits<2> size> @@ -4746,7 +5622,6 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern= let Defs = [VPR]; let Predicates = [HasMVEFloat]; - let validForTailPredication = 1; } class MVE_VPTft1<string suffix, bit size> @@ -4816,7 +5691,6 @@ def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, let Inst{4} = 0b0; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b1; - let validForTailPredication = 1; } foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", @@ -4826,87 +5700,87 @@ def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm", let Predicates = [HasMVEInt] in { def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), - (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), - (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), - (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), - (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), - (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), - (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>; + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>; def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), - (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>; def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), - (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>; def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), - (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>; def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), - (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>; // Pred <-> Int def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))), - (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))), - (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))), - (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))), - (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))), - (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))), - (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))), - (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))), - (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))), - (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>; def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))), - (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>; + (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))), - (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))), - (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, ARMCCne))>; } let Predicates = [HasMVEFloat] in { // Pred <-> Float // 112 is 1.0 in float def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))), - (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>; // 2620 in 1.0 in half def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))), - (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>; // 240 is -1.0 in float def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))), - (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>; // 2748 is -1.0 in half def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))), - (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>; def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>; } def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, @@ -4955,6 +5829,8 @@ class MVE_WLSTP<string asm, bits<2> size> let Inst{13} = 0b0; let Inst{11} = label{0}; let Inst{10-1} = label{10-1}; + let isBranch = 1; + let isTerminator = 1; } def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>; @@ -4983,6 +5859,8 @@ def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout), let Inst{13} = 0b0; let Inst{11} = label{0}; let Inst{10-1} = label{10-1}; + let isBranch = 1; + let isTerminator = 1; } def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { @@ -4998,61 +5876,7 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { // Patterns //===----------------------------------------------------------------------===// -class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst, - PatFrag StoreKind, int shift> - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>; -class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst, - PatFrag StoreKind, int shift> - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred)>; - -multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind, - int shift> { - def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>; - def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>; - def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>; - def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>; - def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>; - def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>; - def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>; -} - -class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst, - PatFrag LoadKind, int shift> - : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)), - (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>; -class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst, - PatFrag LoadKind, int shift> - : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), - (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred))>; - -multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind, - int shift> { - def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>; - def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>; - def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>; - def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>; - def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>; - def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>; - def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>; -} - -class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode, - PatFrag StoreKind, int shift> - : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr), - (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>; - -multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind, - int shift> { - def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>; - def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>; - def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>; - def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>; - def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>; - def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>; - def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>; -} +// PatFrags for loads and stores. Often trying to keep semi-consistent names. def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (pre_store node:$val, node:$ptr, node:$offset), [{ @@ -5072,77 +5896,249 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), }]>; -def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); return Ld->getMemoryVT().getScalarType() == MVT::i8; }]>; -def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; }]>; -def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; }]>; -def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; -def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; }]>; -def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; }]>; -def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; }]>; -def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; -def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast<MaskedLoadSDNode>(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; }]>; -def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ +def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; -def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (maskedstore8 node:$val, node:$ptr, node:$pred), [{ - return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); +def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; }]>; -def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ + +def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; +}]>; +def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return AM == ISD::POST_INC || AM == ISD::POST_DEC; +}]>; +def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ auto *St = cast<MaskedStoreSDNode>(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; }]>; +def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; +def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; + + +// PatFrags for "Aligned" extending / truncating -def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (maskedstore16 node:$val, node:$ptr, node:$pred), [{ +def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>; +def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>; +def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>; + +def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi8 node:$val, node:$ptr)>; +def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi8 node:$val, node:$base, node:$offset)>; +def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi8 node:$val, node:$base, node:$offset)>; + +let MinAlignment = 2 in { + def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; + def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; + def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; + + def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi16 node:$val, node:$ptr)>; + def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi16 node:$val, node:$base, node:$offset)>; + def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi16 node:$val, node:$base, node:$offset)>; +} + +def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred), + (masked_st node:$val, node:$base, undef, node:$pred), [{ return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); }]>; -def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ +def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred), + (truncmaskedst node:$val, node:$base, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred), + (truncmaskedst node:$val, node:$base, node:$pred), [{ auto *St = cast<MaskedStoreSDNode>(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); - return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; }]>; +def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (masked_st node:$val, node:$base, node:$offset, node:$pred), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC); +}]>; +def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (masked_st node:$val, node:$base, node:$offset, node:$postd), [{ + ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode(); + return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC); +}]>; +def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + +// Load/store patterns + +class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>; + +class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred)>; + +multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind, + int shift> { + def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>; +} + +class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst, + PatFrag LoadKind, int shift> + : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)), + (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>; + +class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst, + PatFrag LoadKind, int shift> + : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>; + +multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind, + int shift> { + def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>; +} + +class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>; + +class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred)>; + +multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind, + int shift> { + def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>; +} + let Predicates = [HasMVEInt, IsLE] in { // Stores @@ -5220,116 +6216,73 @@ let Predicates = [HasMVEInt, IsBE] in { let Predicates = [HasMVEInt] in { // Aligned masked store, shared between LE and BE - def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, maskedstore8, 0>; - def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, maskedstore16, 1>; - def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, maskedstore16, 1>; - def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, maskedstore32, 2>; - def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, maskedstore32, 2>; - // Truncating stores - def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, aligned_maskedstvi8, 0>; + def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, aligned_maskedstvi16, 1>; + def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, aligned_maskedstvi16, 1>; + def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, aligned_maskedstvi32, 2>; + def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, aligned_maskedstvi32, 2>; + + // Pre/Post inc masked stores + def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, aligned_pre_maskedstorevi8, 0>; + def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, aligned_post_maskedstorevi8, 0>; + def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>; + def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>; + def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>; + def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>; + def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>; + def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>; + def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>; + def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>; + // Aligned masked loads - def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>; - def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>; - def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>; - def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>; - def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>; - // Extending masked loads. - def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, aligned_maskedloadvi8, 0>; + def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>; + def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>; + def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>; + def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>; } // Widening/Narrowing Loads/Stores -let MinAlignment = 2 in { - def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr), - (truncstorevi16 node:$val, node:$ptr)>; - def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncstvi16 node:$val, node:$base, node:$offset)>; - def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncstvi16 node:$val, node:$base, node:$offset)>; -} - -let Predicates = [HasMVEInt] in { - def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), - (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr), - (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr), - (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>; - - def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), - (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; - - def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), - (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; -} - - -let MinAlignment = 2 in { - def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; - def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; - def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; -} - -multiclass MVEExtLoad<string DestLanes, string DestElemBits, - string SrcElemBits, string SrcElemType, - string Align, Operand am> { - def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits) - (!cast<PatFrag>("extloadvi" # SrcElemBits # Align) am:$addr)), - (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits) - am:$addr)>; - def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits) - (!cast<PatFrag>("zextloadvi" # SrcElemBits # Align) am:$addr)), - (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits) - am:$addr)>; - def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits) - (!cast<PatFrag>("sextloadvi" # SrcElemBits # Align) am:$addr)), - (!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits) - am:$addr)>; +multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string StoreInst, + string Amble, ValueType VT, int Shift> { + // Trunc stores + def : Pat<(!cast<PatFrag>("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr), + (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr)>; + def : Pat<(!cast<PatFrag>("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr), + (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>; + def : Pat<(!cast<PatFrag>("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr), + (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>; + + // Masked trunc stores + def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred), + (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred)>; + def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred), + (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>; + def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred), + (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>; + + // Ext loads + def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)), + (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>; + def : Pat<(VT (!cast<PatFrag>("aligned_sextload"#Amble) taddrmode_imm7<Shift>:$addr)), + (VT (LoadSInst taddrmode_imm7<Shift>:$addr))>; + def : Pat<(VT (!cast<PatFrag>("aligned_zextload"#Amble) taddrmode_imm7<Shift>:$addr)), + (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>; + + // Masked ext loads + def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>; + def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>; + def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>; } let Predicates = [HasMVEInt] in { - defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>; - defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>; - defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>; + defm : MVEExtLoadStore<MVE_VLDRBS16, MVE_VLDRBU16, "MVE_VSTRB16", "vi8", v8i16, 0>; + defm : MVEExtLoadStore<MVE_VLDRBS32, MVE_VLDRBU32, "MVE_VSTRB32", "vi8", v4i32, 0>; + defm : MVEExtLoadStore<MVE_VLDRHS32, MVE_VLDRHU32, "MVE_VSTRH32", "vi16", v4i32, 1>; } |