aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM/ARMInstrMVE.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM/ARMInstrMVE.td')
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td2779
1 files changed, 1866 insertions, 913 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 4f67cd6e47cc..604291be822c 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -275,6 +275,83 @@ class mve_addr_q_shift<int shift> : MemOperand {
let MIOperandInfo = (ops MQPR:$base, i32imm:$imm);
}
+// A family of classes wrapping up information about the vector types
+// used by MVE.
+class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
+ bits<2> size, string suffixletter, bit unsigned> {
+ // The LLVM ValueType representing the vector, so we can use it in
+ // ISel patterns.
+ ValueType Vec = vec;
+
+ // The LLVM ValueType representing a vector with elements double the size
+ // of those in Vec, so we can use it in ISel patterns. It is up to the
+ // invoker of this class to ensure that this is a correct choice.
+ ValueType DblVec = dblvec;
+
+ // An LLVM ValueType representing a corresponding vector of
+ // predicate bits, for use in ISel patterns that handle an IR
+ // intrinsic describing the predicated form of the instruction.
+ //
+ // Usually, for a vector of N things, this will be vNi1. But for
+ // vectors of 2 values, we make an exception, and use v4i1 instead
+ // of v2i1. Rationale: MVE codegen doesn't support doing all the
+ // auxiliary operations on v2i1 (vector shuffles etc), and also,
+ // there's no MVE compare instruction that will _generate_ v2i1
+ // directly.
+ ValueType Pred = pred;
+
+ // The most common representation of the vector element size in MVE
+ // instruction encodings: a 2-bit value V representing an (8<<V)-bit
+ // vector element.
+ bits<2> Size = size;
+
+ // For vectors explicitly mentioning a signedness of integers: 0 for
+ // signed and 1 for unsigned. For anything else, undefined.
+ bit Unsigned = unsigned;
+
+ // The number of bits in a vector element, in integer form.
+ int LaneBits = !shl(8, Size);
+
+ // The suffix used in assembly language on an instruction operating
+ // on this lane if it only cares about number of bits.
+ string BitsSuffix = !if(!eq(suffixletter, "p"),
+ !if(!eq(unsigned, 0b0), "8", "16"),
+ !cast<string>(LaneBits));
+
+ // The suffix used on an instruction that mentions the whole type.
+ string Suffix = suffixletter ## BitsSuffix;
+
+ // The letter part of the suffix only.
+ string SuffixLetter = suffixletter;
+}
+
+// Integer vector types that don't treat signed and unsigned differently.
+def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "i", ?>;
+def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "i", ?>;
+def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "i", ?>;
+def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "i", ?>;
+
+// Explicitly signed and unsigned integer vectors. They map to the
+// same set of LLVM ValueTypes as above, but are represented
+// differently in assembly and instruction encodings.
+def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "s", 0b0>;
+def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "s", 0b0>;
+def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "s", 0b0>;
+def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "s", 0b0>;
+def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "u", 0b1>;
+def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "u", 0b1>;
+def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "u", 0b1>;
+def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "u", 0b1>;
+
+// FP vector types.
+def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, 0b01, "f", ?>;
+def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, 0b10, "f", ?>;
+def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, 0b11, "f", ?>;
+
+// Polynomial vector types.
+def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b11, "p", 0b0>;
+def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b11, "p", 0b1>;
+
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
@@ -346,9 +423,12 @@ class MVE_ScalarShiftSingleReg<string iname, dag iops, string asm, string cstr,
let Inst{19-16} = RdaDest{3-0};
}
-class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4, list<dag> pattern=[]>
+class MVE_ScalarShiftSRegImm<string iname, bits<2> op5_4>
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, long_shift:$imm),
- "$RdaSrc, $imm", "$RdaDest = $RdaSrc", pattern> {
+ "$RdaSrc, $imm", "$RdaDest = $RdaSrc",
+ [(set rGPR:$RdaDest,
+ (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
+ (i32 rGPR:$RdaSrc), (i32 imm:$imm))))]> {
bits<5> imm;
let Inst{15} = 0b0;
@@ -364,9 +444,12 @@ def MVE_SRSHR : MVE_ScalarShiftSRegImm<"srshr", 0b10>;
def MVE_UQSHL : MVE_ScalarShiftSRegImm<"uqshl", 0b00>;
def MVE_URSHR : MVE_ScalarShiftSRegImm<"urshr", 0b01>;
-class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]>
+class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4>
: MVE_ScalarShiftSingleReg<iname, (ins rGPR:$RdaSrc, rGPR:$Rm),
- "$RdaSrc, $Rm", "$RdaDest = $RdaSrc", pattern> {
+ "$RdaSrc, $Rm", "$RdaDest = $RdaSrc",
+ [(set rGPR:$RdaDest,
+ (i32 (!cast<Intrinsic>("int_arm_mve_" # iname)
+ (i32 rGPR:$RdaSrc), (i32 rGPR:$Rm))))]> {
bits<4> Rm;
let Inst{15-12} = Rm{3-0};
@@ -487,10 +570,10 @@ class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
let Inst{4} = 0b0;
}
-class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+class MVE_VABAV<string suffix, bit U, bits<2> size>
: MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
- pattern> {
+ []> {
bits<4> Qm;
bits<4> Qn;
bits<4> Rda;
@@ -509,12 +592,35 @@ class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let Inst{0} = 0b1;
}
-def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>;
-def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>;
-def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>;
-def MVE_VABAVu8 : MVE_VABAV<"u8", 0b1, 0b00>;
-def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>;
-def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>;
+multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VABAV<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (int_arm_mve_vabav
+ (i32 VTI.Unsigned),
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (Inst (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vabav_predicated
+ (i32 VTI.Unsigned),
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (Inst (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
+}
+
+defm MVE_VABAVs8 : MVE_VABAV_m<MVE_v16s8>;
+defm MVE_VABAVs16 : MVE_VABAV_m<MVE_v8s16>;
+defm MVE_VABAVs32 : MVE_VABAV_m<MVE_v4s32>;
+defm MVE_VABAVu8 : MVE_VABAV_m<MVE_v16u8>;
+defm MVE_VABAVu16 : MVE_VABAV_m<MVE_v8u16>;
+defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>;
class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, bits<2> size, list<dag> pattern=[]>
@@ -658,17 +764,31 @@ class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
let Inst{0} = 0b0;
}
-multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
- def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b1, bit_7>;
- def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b1, bit_7>;
- def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b1, bit_7>;
- def u8 : MVE_VMINMAXV<iname, "u8", 0b1, 0b00, 0b1, bit_7>;
- def u16 : MVE_VMINMAXV<iname, "u16", 0b1, 0b01, 0b1, bit_7>;
- def u32 : MVE_VMINMAXV<iname, "u32", 0b1, 0b10, 0b1, bit_7>;
+multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7,
+ MVEVectorVTInfo VTI, Intrinsic intr> {
+ def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
+ bit_17, bit_7>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in
+ def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))),
+ (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
+}
+
+multiclass MVE_VMINMAXV_ty<string iname, bit bit_7,
+ Intrinsic intr_s, Intrinsic intr_u> {
+ defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>;
+ defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>;
+ defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>;
+ defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>;
+ defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>;
+ defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>;
}
-defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>;
-defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>;
+defm MVE_VMINV : MVE_VMINMAXV_ty<
+ "vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>;
+defm MVE_VMAXV : MVE_VMINMAXV_ty<
+ "vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
@@ -709,10 +829,9 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
- bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]>
+ bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
- "$RdaDest, $Qn, $Qm", cstr, pattern> {
+ "$RdaDest, $Qn, $Qm", cstr, []> {
bits<4> RdaDest;
bits<3> Qm;
bits<3> Qn;
@@ -730,47 +849,88 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
let Inst{0} = bit_0;
}
-multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix,
- bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]> {
- def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix,
+multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
+ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> {
+ def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix,
(ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
- def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix,
+ sz, bit_28, 0b0, X, bit_8, bit_0>;
+ def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix,
(ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
"$RdaDest = $RdaSrc",
- sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
+ sz, bit_28, 0b1, X, bit_8, bit_0>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (int_arm_mve_vmldava
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 0) /* accumulator */,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava_predicated
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 0) /* accumulator */,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava_predicated
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
}
-multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28,
- 0b0, bit_8, bit_0, pattern>;
- defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28,
- 0b1, bit_8, bit_0, pattern>;
+multiclass MVE_VMLAMLSDAV_AX<string iname, MVEVectorVTInfo VTI, bit sz,
+ bit bit_28, bit bit_8, bit bit_0> {
+ defm "" : MVE_VMLAMLSDAV_A<iname, "", VTI, sz, bit_28,
+ 0b0, bit_8, bit_0>;
+ defm "" : MVE_VMLAMLSDAV_A<iname, "x", VTI, sz, bit_28,
+ 0b1, bit_8, bit_0>;
}
-multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix,
- sz, 0b0, bit_8, 0b0, pattern>;
- defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix,
- sz, 0b1, 0b0, bit_8, 0b0, pattern>;
+multiclass MVE_VMLADAV_multi<MVEVectorVTInfo SVTI, MVEVectorVTInfo UVTI,
+ bit sz, bit bit_8> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmladav", SVTI,
+ sz, 0b0, bit_8, 0b0>;
+ defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", UVTI,
+ sz, 0b1, 0b0, bit_8, 0b0>;
}
-multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix,
- sz, bit_28, 0b0, 0b1, pattern>;
+multiclass MVE_VMLSDAV_multi<MVEVectorVTInfo VTI, bit sz, bit bit_28> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", VTI,
+ sz, bit_28, 0b0, 0b1>;
}
-defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>;
-defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>;
-defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v16s8, MVE_v16u8, 0b0, 0b1>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v8s16, MVE_v8u16, 0b0, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v4s32, MVE_v4u32, 0b1, 0b0>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>;
// vmlav aliases vmladav
foreach acc = ["", "a"] in {
@@ -932,6 +1092,16 @@ let Predicates = [HasMVEFloat] in {
(v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
(v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), (i32 0),
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ ARMVCCThen, (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), (i32 0),
+ (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ ARMVCCThen, (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
}
def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>;
@@ -942,6 +1112,16 @@ let Predicates = [HasMVEFloat] in {
(v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
(v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ (i32 0), (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2),
+ ARMVCCThen, (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ (i32 0), (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2),
+ ARMVCCThen, (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
}
@@ -957,50 +1137,48 @@ class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
let Inst{8} = 0b0;
let Inst{6} = 0b1;
let Inst{4} = bit_4;
+ let validForTailPredication = 1;
}
-multiclass MVE_VMINMAX_all_sizes<string iname, bit bit_4> {
- def s8 : MVE_VMINMAX<iname, "s8", 0b0, 0b00, bit_4>;
- def s16 : MVE_VMINMAX<iname, "s16", 0b0, 0b01, bit_4>;
- def s32 : MVE_VMINMAX<iname, "s32", 0b0, 0b10, bit_4>;
- def u8 : MVE_VMINMAX<iname, "u8", 0b1, 0b00, bit_4>;
- def u16 : MVE_VMINMAX<iname, "u16", 0b1, 0b01, bit_4>;
- def u32 : MVE_VMINMAX<iname, "u32", 0b1, 0b10, bit_4>;
-}
+multiclass MVE_VMINMAX_m<string iname, bit bit_4, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VMINMAX<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, bit_4>;
+ defvar Inst = !cast<Instruction>(NAME);
-defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>;
-defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated min/max
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ // Predicated min/max
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+multiclass MVE_VMAX<MVEVectorVTInfo VTI>
+ : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>;
+multiclass MVE_VMIN<MVEVectorVTInfo VTI>
+ : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>;
+
+defm MVE_VMINs8 : MVE_VMIN<MVE_v16s8>;
+defm MVE_VMINs16 : MVE_VMIN<MVE_v8s16>;
+defm MVE_VMINs32 : MVE_VMIN<MVE_v4s32>;
+defm MVE_VMINu8 : MVE_VMIN<MVE_v16u8>;
+defm MVE_VMINu16 : MVE_VMIN<MVE_v8u16>;
+defm MVE_VMINu32 : MVE_VMIN<MVE_v4u32>;
+
+defm MVE_VMAXs8 : MVE_VMAX<MVE_v16s8>;
+defm MVE_VMAXs16 : MVE_VMAX<MVE_v8s16>;
+defm MVE_VMAXs32 : MVE_VMAX<MVE_v4s32>;
+defm MVE_VMAXu8 : MVE_VMAX<MVE_v16u8>;
+defm MVE_VMAXu16 : MVE_VMAX<MVE_v8u16>;
+defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>;
+
// end of mve_comp instructions
// start of mve_bit instructions
@@ -1150,53 +1328,61 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f
(MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
- (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
- (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
- (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
- (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
- (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
- (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
- (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
-
- def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
- (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
- (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
- (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
- def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
- (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
+multiclass MVE_bit_op<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated operation
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ // Predicated operation
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (instruction
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+defm : MVE_bit_op<MVE_v16i8, and, int_arm_mve_and_predicated, MVE_VAND>;
+defm : MVE_bit_op<MVE_v8i16, and, int_arm_mve_and_predicated, MVE_VAND>;
+defm : MVE_bit_op<MVE_v4i32, and, int_arm_mve_and_predicated, MVE_VAND>;
+defm : MVE_bit_op<MVE_v2i64, and, int_arm_mve_and_predicated, MVE_VAND>;
+
+defm : MVE_bit_op<MVE_v16i8, or, int_arm_mve_orr_predicated, MVE_VORR>;
+defm : MVE_bit_op<MVE_v8i16, or, int_arm_mve_orr_predicated, MVE_VORR>;
+defm : MVE_bit_op<MVE_v4i32, or, int_arm_mve_orr_predicated, MVE_VORR>;
+defm : MVE_bit_op<MVE_v2i64, or, int_arm_mve_orr_predicated, MVE_VORR>;
+
+defm : MVE_bit_op<MVE_v16i8, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+defm : MVE_bit_op<MVE_v8i16, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+defm : MVE_bit_op<MVE_v4i32, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+defm : MVE_bit_op<MVE_v2i64, xor, int_arm_mve_eor_predicated, MVE_VEOR>;
+
+multiclass MVE_bit_op_with_inv<MVEVectorVTInfo VTI, SDNode unpred_op, Intrinsic pred_int, MVE_bit_ops instruction> {
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated operation
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (vnotq (VTI.Vec MQPR:$Qn)))),
+ (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ // Predicated operation
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (instruction
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+defm : MVE_bit_op_with_inv<MVE_v16i8, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+defm : MVE_bit_op_with_inv<MVE_v8i16, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+defm : MVE_bit_op_with_inv<MVE_v4i32, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+defm : MVE_bit_op_with_inv<MVE_v2i64, and, int_arm_mve_bic_predicated, MVE_VBIC>;
+
+defm : MVE_bit_op_with_inv<MVE_v16i8, or, int_arm_mve_orn_predicated, MVE_VORN>;
+defm : MVE_bit_op_with_inv<MVE_v8i16, or, int_arm_mve_orn_predicated, MVE_VORN>;
+defm : MVE_bit_op_with_inv<MVE_v4i32, or, int_arm_mve_orn_predicated, MVE_VORN>;
+defm : MVE_bit_op_with_inv<MVE_v2i64, or, int_arm_mve_orn_predicated, MVE_VORN>;
+
class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
@@ -1429,8 +1615,9 @@ class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
let Inst{3-1} = Qm{2-0};
}
-class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_int<"vmul", suffix, size, pattern> {
+class MVE_VMULt1<string iname, string suffix, bits<2> size,
+ list<dag> pattern=[]>
+ : MVE_int<iname, suffix, size, pattern> {
let Inst{28} = 0b0;
let Inst{25-23} = 0b110;
@@ -1438,22 +1625,36 @@ class MVE_VMULt1<string suffix, bits<2> size, list<dag> pattern=[]>
let Inst{12-8} = 0b01001;
let Inst{4} = 0b1;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
-def MVE_VMULt1i8 : MVE_VMULt1<"i8", 0b00>;
-def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>;
-def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>;
+multiclass MVE_VMUL_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VMULt1<iname, VTI.Suffix, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated multiply
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
+multiclass MVE_VMUL<MVEVectorVTInfo VTI>
+ : MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>;
+
+defm MVE_VMULi8 : MVE_VMUL<MVE_v16i8>;
+defm MVE_VMULi16 : MVE_VMUL<MVE_v8i16>;
+defm MVE_VMULi32 : MVE_VMUL<MVE_v4i32>;
+
+class MVE_VQxDMULH_Base<string iname, string suffix, bits<2> size, bit rounding,
list<dag> pattern=[]>
: MVE_int<iname, suffix, size, pattern> {
@@ -1465,18 +1666,40 @@ class MVE_VQxDMULH<string iname, string suffix, bits<2> size, bit rounding,
let Inst{0} = 0b0;
}
-class MVE_VQDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VQxDMULH<"vqdmulh", suffix, size, 0b0, pattern>;
-class MVE_VQRDMULH<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VQxDMULH<"vqrdmulh", suffix, size, 0b1, pattern>;
+multiclass MVE_VQxDMULH_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int,
+ bit rounding> {
+ def "" : MVE_VQxDMULH_Base<iname, VTI.Suffix, VTI.Size, rounding>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VQDMULHi8 : MVE_VQDMULH<"s8", 0b00>;
-def MVE_VQDMULHi16 : MVE_VQDMULH<"s16", 0b01>;
-def MVE_VQDMULHi32 : MVE_VQDMULH<"s32", 0b10>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated multiply
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-def MVE_VQRDMULHi8 : MVE_VQRDMULH<"s8", 0b00>;
-def MVE_VQRDMULHi16 : MVE_VQRDMULH<"s16", 0b01>;
-def MVE_VQRDMULHi32 : MVE_VQRDMULH<"s32", 0b10>;
+multiclass MVE_VQxDMULH<string iname, MVEVectorVTInfo VTI, bit rounding>
+ : MVE_VQxDMULH_m<iname, VTI, !if(rounding, int_arm_mve_vqrdmulh,
+ int_arm_mve_vqdmulh),
+ !if(rounding, int_arm_mve_qrdmulh_predicated,
+ int_arm_mve_qdmulh_predicated),
+ rounding>;
+
+defm MVE_VQDMULHi8 : MVE_VQxDMULH<"vqdmulh", MVE_v16s8, 0b0>;
+defm MVE_VQDMULHi16 : MVE_VQxDMULH<"vqdmulh", MVE_v8s16, 0b0>;
+defm MVE_VQDMULHi32 : MVE_VQxDMULH<"vqdmulh", MVE_v4s32, 0b0>;
+
+defm MVE_VQRDMULHi8 : MVE_VQxDMULH<"vqrdmulh", MVE_v16s8, 0b1>;
+defm MVE_VQRDMULHi16 : MVE_VQxDMULH<"vqrdmulh", MVE_v8s16, 0b1>;
+defm MVE_VQRDMULHi32 : MVE_VQxDMULH<"vqrdmulh", MVE_v4s32, 0b1>;
class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
list<dag> pattern=[]>
@@ -1491,39 +1714,40 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract,
let validForTailPredication = 1;
}
-class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VADDSUB<"vadd", suffix, size, 0b0, pattern>;
-class MVE_VSUB<string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_VADDSUB<"vsub", suffix, size, 0b1, pattern>;
+multiclass MVE_VADDSUB_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VADDSUB<iname, VTI.Suffix, VTI.Size, subtract>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VADDi8 : MVE_VADD<"i8", 0b00>;
-def MVE_VADDi16 : MVE_VADD<"i16", 0b01>;
-def MVE_VADDi32 : MVE_VADD<"i32", 0b10>;
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated add/subtract
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VADDi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VADDi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VADDi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
+ // Predicated add/subtract
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-def MVE_VSUBi8 : MVE_VSUB<"i8", 0b00>;
-def MVE_VSUBi16 : MVE_VSUB<"i16", 0b01>;
-def MVE_VSUBi32 : MVE_VSUB<"i32", 0b10>;
+multiclass MVE_VADD<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
+multiclass MVE_VSUB<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
- (v16i8 (MVE_VSUBi8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
- def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
- (v8i16 (MVE_VSUBi16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
- def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
- (v4i32 (MVE_VSUBi32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
-}
+defm MVE_VADDi8 : MVE_VADD<MVE_v16i8>;
+defm MVE_VADDi16 : MVE_VADD<MVE_v8i16>;
+defm MVE_VADDi32 : MVE_VADD<MVE_v4i32>;
+
+defm MVE_VSUBi8 : MVE_VSUB<MVE_v16i8>;
+defm MVE_VSUBi16 : MVE_VSUB<MVE_v8i16>;
+defm MVE_VSUBi32 : MVE_VSUB<MVE_v4i32>;
class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
- bits<2> size, ValueType vt>
+ bits<2> size>
: MVE_int<iname, suffix, size, []> {
let Inst{28} = U;
@@ -1535,50 +1759,75 @@ class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract,
let Inst{4} = 0b1;
let Inst{0} = 0b0;
let validForTailPredication = 1;
+}
- ValueType VT = vt;
+class MVE_VQADD_<string suffix, bit U, bits<2> size>
+ : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size>;
+class MVE_VQSUB_<string suffix, bit U, bits<2> size>
+ : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size>;
+
+multiclass MVE_VQADD_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VQADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating add
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated saturating add
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-class MVE_VQADD<string suffix, bit U, bits<2> size, ValueType VT>
- : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>;
-class MVE_VQSUB<string suffix, bit U, bits<2> size, ValueType VT>
- : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>;
+multiclass MVE_VQADD<MVEVectorVTInfo VTI, SDNode unpred_op>
+ : MVE_VQADD_m<VTI, unpred_op, int_arm_mve_qadd_predicated>;
+
+defm MVE_VQADDs8 : MVE_VQADD<MVE_v16s8, saddsat>;
+defm MVE_VQADDs16 : MVE_VQADD<MVE_v8s16, saddsat>;
+defm MVE_VQADDs32 : MVE_VQADD<MVE_v4s32, saddsat>;
+defm MVE_VQADDu8 : MVE_VQADD<MVE_v16u8, uaddsat>;
+defm MVE_VQADDu16 : MVE_VQADD<MVE_v8u16, uaddsat>;
+defm MVE_VQADDu32 : MVE_VQADD<MVE_v4u32, uaddsat>;
+
+multiclass MVE_VQSUB_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VQSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating subtract
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated saturating subtract
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>;
-def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>;
-def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>;
-def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>;
-def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>;
-def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>;
+multiclass MVE_VQSUB<MVEVectorVTInfo VTI, SDNode unpred_op>
+ : MVE_VQSUB_m<VTI, unpred_op, int_arm_mve_qsub_predicated>;
-def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>;
-def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>;
-def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>;
-def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>;
-def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>;
-def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>;
+defm MVE_VQSUBs8 : MVE_VQSUB<MVE_v16s8, ssubsat>;
+defm MVE_VQSUBs16 : MVE_VQSUB<MVE_v8s16, ssubsat>;
+defm MVE_VQSUBs32 : MVE_VQSUB<MVE_v4s32, ssubsat>;
+defm MVE_VQSUBu8 : MVE_VQSUB<MVE_v16u8, usubsat>;
+defm MVE_VQSUBu16 : MVE_VQSUB<MVE_v8u16, usubsat>;
+defm MVE_VQSUBu32 : MVE_VQSUB<MVE_v4u32, usubsat>;
-let Predicates = [HasMVEInt] in {
- foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
- foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
- foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
- foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in
- foreach VT = [instr.VT] in
- def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))),
- (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>;
-}
-
-
-class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+class MVE_VABD_int<string suffix, bit U, bits<2> size,
+ list<dag> pattern=[]>
: MVE_int<"vabd", suffix, size, pattern> {
let Inst{28} = U;
@@ -1590,14 +1839,38 @@ class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>;
-def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>;
-def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>;
-def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>;
-def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>;
-def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>;
+multiclass MVE_VABD_m<MVEVectorVTInfo VTI,
+ Intrinsic unpred_int, Intrinsic pred_int> {
+ def "" : MVE_VABD_int<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated absolute difference
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated absolute difference
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VABD<MVEVectorVTInfo VTI>
+ : MVE_VABD_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
-class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+defm MVE_VABDs8 : MVE_VABD<MVE_v16s8>;
+defm MVE_VABDs16 : MVE_VABD<MVE_v8s16>;
+defm MVE_VABDs32 : MVE_VABD<MVE_v4s32>;
+defm MVE_VABDu8 : MVE_VABD<MVE_v16u8>;
+defm MVE_VABDu16 : MVE_VABD<MVE_v8u16>;
+defm MVE_VABDu32 : MVE_VABD<MVE_v4u32>;
+
+class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
: MVE_int<"vrhadd", suffix, size, pattern> {
let Inst{28} = U;
@@ -1609,12 +1882,36 @@ class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>;
-def MVE_VRHADDs16 : MVE_VRHADD<"s16", 0b0, 0b01>;
-def MVE_VRHADDs32 : MVE_VRHADD<"s32", 0b0, 0b10>;
-def MVE_VRHADDu8 : MVE_VRHADD<"u8", 0b1, 0b00>;
-def MVE_VRHADDu16 : MVE_VRHADD<"u16", 0b1, 0b01>;
-def MVE_VRHADDu32 : MVE_VRHADD<"u32", 0b1, 0b10>;
+multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated rounding add-with-divide-by-two
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated add-with-divide-by-two
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VRHADD<MVEVectorVTInfo VTI>
+ : MVE_VRHADD_m<VTI, int_arm_mve_vrhadd, int_arm_mve_rhadd_predicated>;
+
+defm MVE_VRHADDs8 : MVE_VRHADD<MVE_v16s8>;
+defm MVE_VRHADDs16 : MVE_VRHADD<MVE_v8s16>;
+defm MVE_VRHADDs32 : MVE_VRHADD<MVE_v4s32>;
+defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
+defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
+defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
bits<2> size, list<dag> pattern=[]>
@@ -1631,81 +1928,73 @@ class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
let validForTailPredication = 1;
}
-class MVE_VHADD<string suffix, bit U, bits<2> size,
+class MVE_VHADD_<string suffix, bit U, bits<2> size,
list<dag> pattern=[]>
: MVE_VHADDSUB<"vhadd", suffix, U, 0b0, size, pattern>;
-class MVE_VHSUB<string suffix, bit U, bits<2> size,
+class MVE_VHSUB_<string suffix, bit U, bits<2> size,
list<dag> pattern=[]>
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
-def MVE_VHADDs8 : MVE_VHADD<"s8", 0b0, 0b00>;
-def MVE_VHADDs16 : MVE_VHADD<"s16", 0b0, 0b01>;
-def MVE_VHADDs32 : MVE_VHADD<"s32", 0b0, 0b10>;
-def MVE_VHADDu8 : MVE_VHADD<"u8", 0b1, 0b00>;
-def MVE_VHADDu16 : MVE_VHADD<"u16", 0b1, 0b01>;
-def MVE_VHADDu32 : MVE_VHADD<"u32", 0b1, 0b10>;
-
-def MVE_VHSUBs8 : MVE_VHSUB<"s8", 0b0, 0b00>;
-def MVE_VHSUBs16 : MVE_VHSUB<"s16", 0b0, 0b01>;
-def MVE_VHSUBs32 : MVE_VHSUB<"s32", 0b0, 0b10>;
-def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>;
-def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>;
-def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>;
+multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated add-and-divide-by-two
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated add-and-divide-by-two
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (ARMvshrsImm
- (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHADDs8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshrsImm
- (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHADDs16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshrsImm
- (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHADDs32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
-
- def : Pat<(v16i8 (ARMvshruImm
- (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHADDu8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshruImm
- (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHADDu16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshruImm
- (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHADDu32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
-
- def : Pat<(v16i8 (ARMvshrsImm
- (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHSUBs8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshrsImm
- (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHSUBs16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshrsImm
- (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHSUBs32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
-
- def : Pat<(v16i8 (ARMvshruImm
- (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)),
- (v16i8 (MVE_VHSUBu8
- (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>;
- def : Pat<(v8i16 (ARMvshruImm
- (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)),
- (v8i16 (MVE_VHSUBu16
- (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>;
- def : Pat<(v4i32 (ARMvshruImm
- (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)),
- (v4i32 (MVE_VHSUBu32
- (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>;
+multiclass MVE_VHADD<MVEVectorVTInfo VTI>
+ : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
+
+defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8>;
+defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16>;
+defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32>;
+defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8>;
+defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16>;
+defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32>;
+
+multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated subtract-and-divide-by-two
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated subtract-and-divide-by-two
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+multiclass MVE_VHSUB<MVEVectorVTInfo VTI>
+ : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated>;
+
+defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8>;
+defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16>;
+defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32>;
+defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8>;
+defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16>;
+defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32>;
+
class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
"vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
@@ -1873,6 +2162,49 @@ def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
+// int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
+// zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
+multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
+ dag zero_vec, MVE_VQABSNEG vqabs_instruction,
+ MVE_VQABSNEG vqneg_instruction> {
+ let Predicates = [HasMVEInt] in {
+ // The below tree can be replaced by a vqabs instruction, as it represents
+ // the following vectorized expression (r being the value in $reg):
+ // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r)
+ def : Pat<(VTI.Vec (vselect
+ (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), ARMCCgt)),
+ (VTI.Vec MQPR:$reg),
+ (VTI.Vec (vselect
+ (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
+ int_max,
+ (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))),
+ (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>;
+ // Similarly, this tree represents vqneg, i.e. the following vectorized expression:
+ // r == INT_MIN ? INT_MAX : -r
+ def : Pat<(VTI.Vec (vselect
+ (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)),
+ int_max,
+ (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))),
+ (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>;
+ }
+}
+
+defm MVE_VQABSNEG_Ps8 : vqabsneg_pattern<MVE_v16i8,
+ (v16i8 (ARMvmovImm (i32 3712))),
+ (v16i8 (ARMvmovImm (i32 3711))),
+ (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
+ MVE_VQABSs8, MVE_VQNEGs8>;
+defm MVE_VQABSNEG_Ps16 : vqabsneg_pattern<MVE_v8i16,
+ (v8i16 (ARMvmovImm (i32 2688))),
+ (v8i16 (ARMvmvnImm (i32 2688))),
+ (bitconvert (v4i32 (ARMvmovImm (i32 0)))),
+ MVE_VQABSs16, MVE_VQNEGs16>;
+defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32,
+ (v4i32 (ARMvmovImm (i32 1664))),
+ (v4i32 (ARMvmvnImm (i32 1664))),
+ (ARMvmovImm (i32 0)),
+ MVE_VQABSs32, MVE_VQNEGs32>;
+
class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
dag iops, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
@@ -1956,6 +2288,7 @@ class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
let Inst{4} = 0b0;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
}
def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>;
@@ -2049,8 +2382,8 @@ let Predicates = [HasMVEInt] in {
class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
- dag immops, list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops),
+ Operand immtype, list<dag> pattern=[]>
+ : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm),
iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b101;
@@ -2059,6 +2392,9 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+
+ // For the MVE_VSHLL_patterns multiclass to refer to
+ Operand immediateType = immtype;
}
// The immediate VSHLL instructions accept shift counts from 1 up to
@@ -2067,7 +2403,7 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
class MVE_VSHLL_imm8<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_7:$imm), pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> {
bits<3> imm;
let Inst{20-19} = 0b01;
let Inst{18-16} = imm;
@@ -2075,7 +2411,7 @@ class MVE_VSHLL_imm8<string iname, string suffix,
class MVE_VSHLL_imm16<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, (ins mve_shift_imm1_15:$imm), pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> {
bits<4> imm;
let Inst{20} = 0b1;
let Inst{19-16} = imm;
@@ -2119,11 +2455,50 @@ defm MVE_VSHLL_lws16 : MVE_VSHLL_lw<"vshll", "s16", 0b01, 0b0, "$Qd, $Qm, #16">;
defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">;
defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">;
+multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
+ defvar suffix = !strconcat(VTI.Suffix, !if(top, "th", "bh"));
+ defvar inst_imm = !cast<MVE_VSHLL_imm>("MVE_VSHLL_imm" # suffix);
+ defvar inst_lw = !cast<MVE_VSHLL_by_lane_width>("MVE_VSHLL_lw" # suffix);
+ defvar unpred_int = int_arm_mve_vshll_imm;
+ defvar pred_int = int_arm_mve_vshll_imm_predicated;
+ defvar imm = inst_imm.immediateType;
+
+ def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), imm:$imm,
+ (i32 VTI.Unsigned), (i32 top))),
+ (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm))>;
+ def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
+ (i32 VTI.Unsigned), (i32 top))),
+ (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src)))>;
+
+ def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
+ (i32 VTI.Unsigned), (i32 top),
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
+ (i32 VTI.Unsigned), (i32 top),
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
+ (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+}
+
+foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
+ foreach top = [0, 1] in
+ defm : MVE_VSHLL_patterns<VTI, top>;
+
+class MVE_shift_imm_partial<Operand imm, string iname, string suffix>
+ : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm),
+ iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> {
+ Operand immediateType = imm;
+}
+
class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
- dag immops, list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
- pattern> {
+ Operand imm, list<dag> pattern=[]>
+ : MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2136,45 +2511,35 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
let Inst{0} = 0b1;
}
-def MVE_VRSHRNi16bh : MVE_VxSHRN<
- "vrshrnb", "i16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi16th : MVE_VxSHRN<
- "vrshrnt", "i16", 0b1, 0b1,(ins shr_imm8:$imm)> {
+def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi32bh : MVE_VxSHRN<
- "vrshrnb", "i32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
-def MVE_VRSHRNi32th : MVE_VxSHRN<
- "vrshrnt", "i32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi16bh : MVE_VxSHRN<
- "vshrnb", "i16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi16th : MVE_VxSHRN<
- "vshrnt", "i16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi32bh : MVE_VxSHRN<
- "vshrnb", "i32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi32th : MVE_VxSHRN<
- "vshrnt", "i32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
-class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag immops,
- list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
- pattern> {
+class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
+ Operand imm, list<dag> pattern=[]>
+ : MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2188,44 +2553,42 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12, dag imm
}
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s16", 0b1, 0b0, (ins shr_imm8:$imm)> {
+ "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
- "vqrshrunt", "s16", 0b1, 0b1, (ins shr_imm8:$imm)> {
+ "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s32", 0b1, 0b0, (ins shr_imm16:$imm)> {
+ "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
- "vqrshrunt", "s32", 0b1, 0b1, (ins shr_imm16:$imm)> {
+ "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
- "vqshrunb", "s16", 0b0, 0b0, (ins shr_imm8:$imm)> {
+ "vqshrunb", "s16", 0b0, 0b0, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
- "vqshrunt", "s16", 0b0, 0b1, (ins shr_imm8:$imm)> {
+ "vqshrunt", "s16", 0b0, 0b1, shr_imm8> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
- "vqshrunb", "s32", 0b0, 0b0, (ins shr_imm16:$imm)> {
+ "vqshrunb", "s32", 0b0, 0b0, shr_imm16> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
- "vqshrunt", "s32", 0b0, 0b1, (ins shr_imm16:$imm)> {
+ "vqshrunt", "s32", 0b0, 0b1, shr_imm16> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
- dag immops, list<dag> pattern=[]>
- : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc",
- pattern> {
+ Operand imm, list<dag> pattern=[]>
+ : MVE_shift_imm_partial<imm, iname, suffix> {
bits<5> imm;
let Inst{25-23} = 0b101;
@@ -2238,19 +2601,19 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
}
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
- def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+ def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> {
let Inst{28} = 0b0;
let Inst{20-19} = 0b01;
}
- def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, (ins shr_imm8:$imm)> {
+ def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> {
let Inst{28} = 0b1;
let Inst{20-19} = 0b01;
}
- def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+ def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> {
let Inst{28} = 0b0;
let Inst{20} = 0b1;
}
- def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, (ins shr_imm16:$imm)> {
+ def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> {
let Inst{28} = 0b1;
let Inst{20} = 0b1;
}
@@ -2261,6 +2624,63 @@ defm MVE_VQRSHRNth : MVE_VxQRSHRN_types<"vqrshrnt", 0b1, 0b1>;
defm MVE_VQSHRNbh : MVE_VxQRSHRN_types<"vqshrnb", 0b0, 0b0>;
defm MVE_VQSHRNth : MVE_VxQRSHRN_types<"vqshrnt", 0b0, 0b1>;
+multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
+ MVEVectorVTInfo OutVTI, MVEVectorVTInfo InVTI,
+ bit q, bit r, bit top> {
+ defvar inparams = (? (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
+ (inst.immediateType:$imm), (i32 q), (i32 r),
+ (i32 OutVTI.Unsigned), (i32 InVTI.Unsigned), (i32 top));
+ defvar outparams = (inst (OutVTI.Vec MQPR:$QdSrc), (InVTI.Vec MQPR:$Qm),
+ (imm:$imm));
+
+ def : Pat<(OutVTI.Vec !setop(inparams, int_arm_mve_vshrn)),
+ (OutVTI.Vec outparams)>;
+ def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
+ (InVTI.Pred VCCR:$pred)))),
+ (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+}
+
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16s8, MVE_v8s16, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8s16, MVE_v4s32, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi16th, MVE_v16u8, MVE_v8u16, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VSHRNi32th, MVE_v8u16, MVE_v4u32, 0,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16s8, MVE_v8s16, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8s16, MVE_v4s32, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8s16, MVE_v4s32, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16bh, MVE_v16u8, MVE_v8u16, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi16th, MVE_v16u8, MVE_v8u16, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32bh, MVE_v8u16, MVE_v4u32, 0,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VRSHRNi32th, MVE_v8u16, MVE_v4u32, 0,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNths16, MVE_v16s8, MVE_v8s16, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNths32, MVE_v8s16, MVE_v4s32, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNthu16, MVE_v16u8, MVE_v8u16, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRNthu32, MVE_v8u16, MVE_v4u32, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs16, MVE_v16s8, MVE_v8s16, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNths16, MVE_v16s8, MVE_v8s16, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhs32, MVE_v8s16, MVE_v4s32, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNths32, MVE_v8s16, MVE_v4s32, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu16, MVE_v16u8, MVE_v8u16, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu16, MVE_v16u8, MVE_v8u16, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNbhu32, MVE_v8u16, MVE_v4u32, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRNthu32, MVE_v8u16, MVE_v4u32, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,0,0>;
+defm : MVE_VSHRN_patterns<MVE_VQSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,0,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16bh, MVE_v16u8, MVE_v8s16, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs16th, MVE_v16u8, MVE_v8s16, 1,1,1>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32bh, MVE_v8u16, MVE_v4s32, 1,1,0>;
+defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>;
+
// end of mve_imm_shift instructions
// start of mve_shift instructions
@@ -2293,13 +2713,31 @@ class MVE_shift_by_vec<string iname, string suffix, bit U,
let validForTailPredication = 1;
}
+multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
+ def "" : MVE_shift_by_vec<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_vector
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_vector_predicated
+ (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> {
- def s8 : MVE_shift_by_vec<iname, "s8", 0b0, 0b00, bit_4, bit_8>;
- def s16 : MVE_shift_by_vec<iname, "s16", 0b0, 0b01, bit_4, bit_8>;
- def s32 : MVE_shift_by_vec<iname, "s32", 0b0, 0b10, bit_4, bit_8>;
- def u8 : MVE_shift_by_vec<iname, "u8", 0b1, 0b00, bit_4, bit_8>;
- def u16 : MVE_shift_by_vec<iname, "u16", 0b1, 0b01, bit_4, bit_8>;
- def u32 : MVE_shift_by_vec<iname, "u32", 0b1, 0b10, bit_4, bit_8>;
+ defm s8 : MVE_shift_by_vec_p<iname, MVE_v16s8, bit_4, bit_8>;
+ defm s16 : MVE_shift_by_vec_p<iname, MVE_v8s16, bit_4, bit_8>;
+ defm s32 : MVE_shift_by_vec_p<iname, MVE_v4s32, bit_4, bit_8>;
+ defm u8 : MVE_shift_by_vec_p<iname, MVE_v16u8, bit_4, bit_8>;
+ defm u16 : MVE_shift_by_vec_p<iname, MVE_v8u16, bit_4, bit_8>;
+ defm u32 : MVE_shift_by_vec_p<iname, MVE_v4u32, bit_4, bit_8>;
}
defm MVE_VSHL_by_vec : mve_shift_by_vec_multi<"vshl", 0b0, 0b0>;
@@ -2340,11 +2778,18 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
let validForTailPredication = 1;
+
+ // For the MVE_shift_imm_patterns multiclass to refer to
+ MVEVectorVTInfo VTI;
+ Operand immediateType;
+ Intrinsic unpred_int;
+ Intrinsic pred_int;
+ dag unsignedFlag = (?);
}
-class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
+class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
: MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qd_src, MQPR:$Qm), imm),
+ (ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm),
"$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
bits<6> imm;
let Inst{28} = 0b1;
@@ -2353,76 +2798,99 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm>
let Inst{10-9} = 0b10;
let Inst{8} = bit_8;
let validForTailPredication = 1;
+
+ Operand immediateType = immType;
}
-def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> {
+def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> {
let Inst{21-19} = 0b001;
}
-def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, (ins shr_imm16:$imm)> {
+def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> {
let Inst{21-20} = 0b01;
}
-def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, (ins shr_imm32:$imm)> {
+def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> {
let Inst{21} = 0b1;
}
-def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, (ins imm0_7:$imm)> {
+def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> {
let Inst{21-19} = 0b001;
}
-def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, (ins imm0_15:$imm)> {
+def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> {
let Inst{21-20} = 0b01;
}
-def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,(ins imm0_31:$imm)> {
+def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> {
let Inst{21} = 0b1;
}
-class MVE_VQSHL_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
+ MVEVectorVTInfo VTI> {
+ defvar inparams = (? (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
+ (inst.immediateType:$imm));
+ defvar outparams = (inst (VTI.Vec MQPR:$QdSrc), (VTI.Vec MQPR:$Qm),
+ (inst.immediateType:$imm));
+ defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # name);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # name # "_predicated");
+
+ def : Pat<(VTI.Vec !setop(inparams, unpred_int)),
+ (VTI.Vec outparams)>;
+ def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
+ (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+}
+
+defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>;
+defm : MVE_VSxI_patterns<MVE_VSLIimm16, "vsli", MVE_v8i16>;
+defm : MVE_VSxI_patterns<MVE_VSLIimm32, "vsli", MVE_v4i32>;
+defm : MVE_VSxI_patterns<MVE_VSRIimm8, "vsri", MVE_v16i8>;
+defm : MVE_VSxI_patterns<MVE_VSRIimm16, "vsri", MVE_v8i16>;
+defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>;
+
+class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
+ let Inst{28} = VTI_.Unsigned;
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b111;
-}
-
-def MVE_VSLIimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-19} = 0b001;
-}
-
-def MVE_VSLIimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-19} = 0b001;
-}
-def MVE_VSLIimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-20} = 0b01;
+ let VTI = VTI_;
+ let immediateType = immType;
+ let unsignedFlag = (? (i32 VTI.Unsigned));
}
-def MVE_VSLIimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-20} = 0b01;
-}
-
-def MVE_VSLIimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21} = 0b1;
-}
-
-def MVE_VSLIimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21} = 0b1;
+let unpred_int = int_arm_mve_vqshl_imm,
+ pred_int = int_arm_mve_vqshl_imm_predicated in {
+ def MVE_VQSHLimms8 : MVE_VQSHL_imm<MVE_v16s8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
+ def MVE_VQSHLimmu8 : MVE_VQSHL_imm<MVE_v16u8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
+
+ def MVE_VQSHLimms16 : MVE_VQSHL_imm<MVE_v8s16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+ def MVE_VQSHLimmu16 : MVE_VQSHL_imm<MVE_v8u16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+
+ def MVE_VQSHLimms32 : MVE_VQSHL_imm<MVE_v4s32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
+ def MVE_VQSHLimmu32 : MVE_VQSHL_imm<MVE_v4u32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
}
-class MVE_VQSHLU_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
@@ -2430,61 +2898,103 @@ class MVE_VQSHLU_imm<string suffix, dag imm>
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b110;
-}
-def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> {
- let Inst{21-19} = 0b001;
+ let VTI = VTI_;
+ let immediateType = immType;
}
-def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> {
- let Inst{21-20} = 0b01;
-}
+let unpred_int = int_arm_mve_vqshlu_imm,
+ pred_int = int_arm_mve_vqshlu_imm_predicated in {
+ def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<MVE_v16s8, imm0_7> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> {
- let Inst{21} = 0b1;
+ def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<MVE_v8s16, imm0_15> {
+ let Inst{21-20} = 0b01;
+ }
+
+ def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<MVE_v4s32, imm0_31> {
+ let Inst{21} = 0b1;
+ }
}
-class MVE_VRSHR_imm<string suffix, dag imm>
- : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd),
- !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
+class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType>
+ : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd),
+ (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
vpred_r, ""> {
bits<6> imm;
+ let Inst{28} = VTI_.Unsigned;
let Inst{25-24} = 0b11;
let Inst{21-16} = imm;
let Inst{10-8} = 0b010;
-}
-def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-19} = 0b001;
+ let VTI = VTI_;
+ let immediateType = immType;
+ let unsignedFlag = (? (i32 VTI.Unsigned));
}
-def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-19} = 0b001;
-}
+let unpred_int = int_arm_mve_vrshr_imm,
+ pred_int = int_arm_mve_vrshr_imm_predicated in {
+ def MVE_VRSHR_imms8 : MVE_VRSHR_imm<MVE_v16s8, shr_imm8> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21-20} = 0b01;
-}
+ def MVE_VRSHR_immu8 : MVE_VRSHR_imm<MVE_v16u8, shr_imm8> {
+ let Inst{21-19} = 0b001;
+ }
-def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21-20} = 0b01;
-}
+ def MVE_VRSHR_imms16 : MVE_VRSHR_imm<MVE_v8s16, shr_imm16> {
+ let Inst{21-20} = 0b01;
+ }
-def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> {
- let Inst{28} = 0b0;
- let Inst{21} = 0b1;
-}
+ def MVE_VRSHR_immu16 : MVE_VRSHR_imm<MVE_v8u16, shr_imm16> {
+ let Inst{21-20} = 0b01;
+ }
-def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> {
- let Inst{28} = 0b1;
- let Inst{21} = 0b1;
+ def MVE_VRSHR_imms32 : MVE_VRSHR_imm<MVE_v4s32, shr_imm32> {
+ let Inst{21} = 0b1;
+ }
+
+ def MVE_VRSHR_immu32 : MVE_VRSHR_imm<MVE_v4u32, shr_imm32> {
+ let Inst{21} = 0b1;
+ }
}
+multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> {
+ def : Pat<(inst.VTI.Vec !con((inst.unpred_int (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm),
+ inst.unsignedFlag)),
+ (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm))>;
+
+ def : Pat<(inst.VTI.Vec !con((inst.pred_int (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm),
+ inst.unsignedFlag,
+ (? (inst.VTI.Pred VCCR:$mask),
+ (inst.VTI.Vec MQPR:$inactive)))),
+ (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
+ inst.immediateType:$imm,
+ ARMVCCThen, (inst.VTI.Pred VCCR:$mask),
+ (inst.VTI.Vec MQPR:$inactive)))>;
+}
+
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimms32>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLimmu32>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms8>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms16>;
+defm : MVE_shift_imm_patterns<MVE_VQSHLU_imms32>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms8>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu8>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms16>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>;
+defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>;
+
class MVE_VSHR_imm<string suffix, dag imm>
: MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
@@ -2550,27 +3060,39 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
let Inst{21} = 0b1;
}
+multiclass MVE_immediate_shift_patterns_inner<
+ MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
+ Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
+
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
+ (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
+
+ def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
+ !dag(pred_int, unsignedFlag, ?),
+ (pred_int (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
+multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
+ Operand imm_operand_type> {
+ defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+ ARMvshlImm, int_arm_mve_shl_imm_predicated,
+ !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
+ defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+ ARMvshruImm, int_arm_mve_shr_imm_predicated,
+ !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
+ defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
+ ARMvshrsImm, int_arm_mve_shr_imm_predicated,
+ !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
+}
+
let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
- (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
- def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
- (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
- def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
- (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
-
- def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
- (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
- def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
- (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
- def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
- (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
-
- def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
- (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
- def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
- (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
- def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
- (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
+ defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
+ defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
+ defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
}
// end of mve_shift instructions
@@ -2652,8 +3174,8 @@ class MVEFloatArithNeon<string iname, string suffix, bit size,
let Inst{16} = 0b0;
}
-class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]>
- : MVEFloatArithNeon<"vmul", suffix, size, (outs MQPR:$Qd),
+class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
+ : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
pattern> {
bits<4> Qd;
@@ -2671,20 +3193,32 @@ class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>;
-def MVE_VMULf16 : MVE_VMUL_fp<"f16", 0b1>;
+multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
+ defvar Inst = !cast<Instruction>(NAME);
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fmul (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VMULf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fmul (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VMULf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
-class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]>
+multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>;
+
+defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
+defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
+
+class MVE_VCMLA<string suffix, bit size>
: MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> {
bits<4> Qd;
bits<4> Qn;
bits<2> rot;
@@ -2701,8 +3235,31 @@ class MVE_VCMLA<string suffix, bit size, list<dag> pattern=[]>
let Inst{4} = 0b0;
}
-def MVE_VCMLAf16 : MVE_VCMLA<"f16", 0b0>;
-def MVE_VCMLAf32 : MVE_VCMLA<"f32", 0b1>;
+multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
+ def "" : MVE_VCMLA<VTI.Suffix, size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcmlaq
+ imm:$rot, (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcmlaq_predicated
+ imm:$rot, (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
+ (VTI.Vec MQPR:$Qm), imm:$rot,
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+
+ }
+}
+
+defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16, 0b0>;
+defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, 0b1>;
class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
bit bit_8, bit bit_21, dag iops=(ins),
@@ -2736,63 +3293,50 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-let Predicates = [HasMVEFloat, UseFusedMAC] in {
- def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>;
-
- def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1),
- (fmul (v8f16 MQPR:$src2),
- (v8f16 MQPR:$src3)))),
- (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>;
- def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1),
- (fmul (v4f32 MQPR:$src2),
- (v4f32 MQPR:$src3)))),
- (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>;
-}
-
let Predicates = [HasMVEFloat] in {
def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
(v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
(v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
+ def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
+ (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
+ def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
+ (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
}
-
-let validForTailPredication = 1 in {
- def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>;
- def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>;
-}
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
+multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
+ let validForTailPredication = 1;
+ }
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
}
+multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>;
+multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>;
-let validForTailPredication = 1 in {
- def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>;
- def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>;
-}
+defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>;
+defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))),
- (v4f32 (MVE_VSUBf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>;
- def : Pat<(v8f16 (fsub (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))),
- (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>;
-}
+defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
+defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
-class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]>
+class MVE_VCADD<string suffix, bit size, string cstr="">
: MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
bits<4> Qd;
bits<4> Qn;
bit rot;
@@ -2810,8 +3354,29 @@ class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]>
let Inst{4} = 0b0;
}
-def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>;
-def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
+ def "" : MVE_VCADD<VTI.Suffix, size, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq (i32 1),
+ imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated (i32 1),
+ imm:$rot, (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ }
+}
+
+defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16, 0b0>;
+defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, 0b1, "@earlyclobber $Qd">;
class MVE_VABD_fp<string suffix, bit size>
: MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
@@ -2833,8 +3398,29 @@ class MVE_VABD_fp<string suffix, bit size>
let validForTailPredication = 1;
}
-def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>;
-def MVE_VABDf16 : MVE_VABD_fp<"f16", 0b1>;
+multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
+ Intrinsic unpred_int, Intrinsic pred_int> {
+ def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 0))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 0), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
+ : MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
+
+defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
+defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
Operand imm_operand_type, list<dag> pattern=[]>
@@ -3186,120 +3772,120 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
-multiclass unpred_vcmp_z<string suffix, int fc> {
- def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))),
+multiclass unpred_vcmp_z<string suffix, PatLeaf fc> {
+ def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
- def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))),
+ def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
- def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))),
+ def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
}
-multiclass unpred_vcmp_r<string suffix, int fc> {
- def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))),
+multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
+ def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
- def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))),
+ def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
- def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))),
+ def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
- def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))),
+ def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))),
+ def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))),
+ def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
}
-multiclass unpred_vcmpf_z<int fc> {
- def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))),
+multiclass unpred_vcmpf_z<PatLeaf fc> {
+ def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
- def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))),
+ def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))),
- (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
+ (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
}
multiclass unpred_vcmpf_r<int fc> {
- def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))),
+ def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
- def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))),
+ def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
- def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))),
+ def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
- def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))),
+ def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))),
- (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))),
- (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
}
let Predicates = [HasMVEInt] in {
- defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>;
- defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>;
- defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>;
- defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>;
- defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>;
- defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>;
- defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>;
- defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>;
-
- defm MVE_VCEQ : unpred_vcmp_r<"i", 0>;
- defm MVE_VCNE : unpred_vcmp_r<"i", 1>;
- defm MVE_VCGE : unpred_vcmp_r<"s", 10>;
- defm MVE_VCLT : unpred_vcmp_r<"s", 11>;
- defm MVE_VCGT : unpred_vcmp_r<"s", 12>;
- defm MVE_VCLE : unpred_vcmp_r<"s", 13>;
- defm MVE_VCGTU : unpred_vcmp_r<"u", 8>;
- defm MVE_VCGEU : unpred_vcmp_r<"u", 2>;
+ defm MVE_VCEQZ : unpred_vcmp_z<"i", ARMCCeq>;
+ defm MVE_VCNEZ : unpred_vcmp_z<"i", ARMCCne>;
+ defm MVE_VCGEZ : unpred_vcmp_z<"s", ARMCCge>;
+ defm MVE_VCLTZ : unpred_vcmp_z<"s", ARMCClt>;
+ defm MVE_VCGTZ : unpred_vcmp_z<"s", ARMCCgt>;
+ defm MVE_VCLEZ : unpred_vcmp_z<"s", ARMCCle>;
+ defm MVE_VCGTUZ : unpred_vcmp_z<"u", ARMCChi>;
+ defm MVE_VCGEUZ : unpred_vcmp_z<"u", ARMCChs>;
+
+ defm MVE_VCEQ : unpred_vcmp_r<"i", ARMCCeq>;
+ defm MVE_VCNE : unpred_vcmp_r<"i", ARMCCne>;
+ defm MVE_VCGE : unpred_vcmp_r<"s", ARMCCge>;
+ defm MVE_VCLT : unpred_vcmp_r<"s", ARMCClt>;
+ defm MVE_VCGT : unpred_vcmp_r<"s", ARMCCgt>;
+ defm MVE_VCLE : unpred_vcmp_r<"s", ARMCCle>;
+ defm MVE_VCGTU : unpred_vcmp_r<"u", ARMCChi>;
+ defm MVE_VCGEU : unpred_vcmp_r<"u", ARMCChs>;
}
let Predicates = [HasMVEFloat] in {
- defm MVE_VFCEQZ : unpred_vcmpf_z<0>;
- defm MVE_VFCNEZ : unpred_vcmpf_z<1>;
- defm MVE_VFCGEZ : unpred_vcmpf_z<10>;
- defm MVE_VFCLTZ : unpred_vcmpf_z<11>;
- defm MVE_VFCGTZ : unpred_vcmpf_z<12>;
- defm MVE_VFCLEZ : unpred_vcmpf_z<13>;
+ defm MVE_VFCEQZ : unpred_vcmpf_z<ARMCCeq>;
+ defm MVE_VFCNEZ : unpred_vcmpf_z<ARMCCne>;
+ defm MVE_VFCGEZ : unpred_vcmpf_z<ARMCCge>;
+ defm MVE_VFCLTZ : unpred_vcmpf_z<ARMCClt>;
+ defm MVE_VFCGTZ : unpred_vcmpf_z<ARMCCgt>;
+ defm MVE_VFCLEZ : unpred_vcmpf_z<ARMCCle>;
- defm MVE_VFCEQ : unpred_vcmpf_r<0>;
- defm MVE_VFCNE : unpred_vcmpf_r<1>;
- defm MVE_VFCGE : unpred_vcmpf_r<10>;
- defm MVE_VFCLT : unpred_vcmpf_r<11>;
- defm MVE_VFCGT : unpred_vcmpf_r<12>;
- defm MVE_VFCLE : unpred_vcmpf_r<13>;
+ defm MVE_VFCEQ : unpred_vcmpf_r<ARMCCeq>;
+ defm MVE_VFCNE : unpred_vcmpf_r<ARMCCne>;
+ defm MVE_VFCGE : unpred_vcmpf_r<ARMCCge>;
+ defm MVE_VFCLT : unpred_vcmpf_r<ARMCClt>;
+ defm MVE_VFCGT : unpred_vcmpf_r<ARMCCgt>;
+ defm MVE_VFCLE : unpred_vcmpf_r<ARMCCle>;
}
@@ -3403,10 +3989,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
-class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag> pattern=[]>
+class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
bits<4> Qn;
bits<2> rot;
@@ -3422,8 +4008,30 @@ class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag>
let Predicates = [HasMVEFloat];
}
-def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>;
-def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
+ bit size, string cstr=""> {
+ def "" : MVE_VCMUL<iname, VTI.Suffix, size, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcmulq
+ imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcmulq_predicated
+ imm:$rot, (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ }
+}
+
+defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>;
+defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">;
class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
bit T, string cstr, list<dag> pattern=[]>
@@ -3442,29 +4050,80 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
let Inst{8} = 0b0;
let Inst{7} = Qn{3};
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
}
-multiclass MVE_VMULL_multi<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20, string cstr=""> {
- def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0, cstr>;
- def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1, cstr>;
+multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int,
+ bit Top, string cstr=""> {
+ def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned,
+ VTI.Size, Top, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ defvar uflag = !if(!eq(VTI.SuffixLetter, "p"), (?), (? (i32 VTI.Unsigned)));
+
+ // Unpredicated multiply
+ def : Pat<(VTI.DblVec !con((unpred_op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn)),
+ uflag, (? (i32 Top)))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply
+ def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn)),
+ uflag, (? (i32 Top), (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ }
}
-// For integer multiplies, bits 21:20 encode size, and bit 28 signedness.
-// For polynomial multiplies, bits 21:20 take the unused value 0b11, and
-// bit 28 switches to encoding the size.
-
-defm MVE_VMULLs8 : MVE_VMULL_multi<"vmull", "s8", 0b0, 0b00>;
-defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>;
-defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">;
-defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>;
-defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>;
-defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">;
-defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>;
-defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>;
-
-class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size,
- bit round, list<dag> pattern=[]>
+// For polynomial multiplies, the size bits take the unused value 0b11, and
+// the unsigned bit switches to encoding the size.
+
+defm MVE_VMULLBs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0,
+ "@earlyclobber $Qd">;
+defm MVE_VMULLTs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1,
+ "@earlyclobber $Qd">;
+
+defm MVE_VMULLBu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0>;
+defm MVE_VMULLTu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1>;
+defm MVE_VMULLBu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b0,
+ "@earlyclobber $Qd">;
+defm MVE_VMULLTu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
+ int_arm_mve_mull_int_predicated, 0b1,
+ "@earlyclobber $Qd">;
+
+defm MVE_VMULLBp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b0>;
+defm MVE_VMULLTp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b1>;
+defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b0>;
+defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
+ int_arm_mve_mull_poly_predicated, 0b1>;
+
+class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
+ list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
vpred_r, "", pattern> {
@@ -3480,19 +4139,46 @@ class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size,
let Inst{0} = 0b1;
}
-def MVE_VMULHs8 : MVE_VxMULH<"vmulh", "s8", 0b0, 0b00, 0b0>;
-def MVE_VMULHs16 : MVE_VxMULH<"vmulh", "s16", 0b0, 0b01, 0b0>;
-def MVE_VMULHs32 : MVE_VxMULH<"vmulh", "s32", 0b0, 0b10, 0b0>;
-def MVE_VMULHu8 : MVE_VxMULH<"vmulh", "u8", 0b1, 0b00, 0b0>;
-def MVE_VMULHu16 : MVE_VxMULH<"vmulh", "u16", 0b1, 0b01, 0b0>;
-def MVE_VMULHu32 : MVE_VxMULH<"vmulh", "u32", 0b1, 0b10, 0b0>;
+multiclass MVE_VxMULH_m<string iname, MVEVectorVTInfo VTI, SDNode unpred_op,
+ Intrinsic pred_int, bit round> {
+ def "" : MVE_VxMULH<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, round>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated multiply returning high bits
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+
+ // Predicated multiply returning high bits
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VMULT<string iname, MVEVectorVTInfo VTI, bit round>
+ : MVE_VxMULH_m<iname, VTI, !if(round, int_arm_mve_vrmulh, int_arm_mve_vmulh),
+ !if(round, int_arm_mve_rmulh_predicated,
+ int_arm_mve_mulh_predicated),
+ round>;
+
+defm MVE_VMULHs8 : MVE_VMULT<"vmulh", MVE_v16s8, 0b0>;
+defm MVE_VMULHs16 : MVE_VMULT<"vmulh", MVE_v8s16, 0b0>;
+defm MVE_VMULHs32 : MVE_VMULT<"vmulh", MVE_v4s32, 0b0>;
+defm MVE_VMULHu8 : MVE_VMULT<"vmulh", MVE_v16u8, 0b0>;
+defm MVE_VMULHu16 : MVE_VMULT<"vmulh", MVE_v8u16, 0b0>;
+defm MVE_VMULHu32 : MVE_VMULT<"vmulh", MVE_v4u32, 0b0>;
-def MVE_VRMULHs8 : MVE_VxMULH<"vrmulh", "s8", 0b0, 0b00, 0b1>;
-def MVE_VRMULHs16 : MVE_VxMULH<"vrmulh", "s16", 0b0, 0b01, 0b1>;
-def MVE_VRMULHs32 : MVE_VxMULH<"vrmulh", "s32", 0b0, 0b10, 0b1>;
-def MVE_VRMULHu8 : MVE_VxMULH<"vrmulh", "u8", 0b1, 0b00, 0b1>;
-def MVE_VRMULHu16 : MVE_VxMULH<"vrmulh", "u16", 0b1, 0b01, 0b1>;
-def MVE_VRMULHu32 : MVE_VxMULH<"vrmulh", "u32", 0b1, 0b10, 0b1>;
+defm MVE_VRMULHs8 : MVE_VMULT<"vrmulh", MVE_v16s8, 0b1>;
+defm MVE_VRMULHs16 : MVE_VMULT<"vrmulh", MVE_v8s16, 0b1>;
+defm MVE_VRMULHs32 : MVE_VMULT<"vrmulh", MVE_v4s32, 0b1>;
+defm MVE_VRMULHu8 : MVE_VMULT<"vrmulh", MVE_v16u8, 0b1>;
+defm MVE_VRMULHu16 : MVE_VMULT<"vrmulh", MVE_v8u16, 0b1>;
+defm MVE_VRMULHu32 : MVE_VMULT<"vrmulh", MVE_v4u32, 0b1>;
class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
bits<2> size, bit T, list<dag> pattern=[]>
@@ -3551,19 +4237,36 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
let Predicates = [HasMVEFloat];
}
-multiclass MVE_VCVT_ff_halves<string suffix, bit op> {
- def bh : MVE_VCVT_ff<"vcvtb", suffix, op, 0b0>;
- def th : MVE_VCVT_ff<"vcvtt", suffix, op, 0b1>;
+multiclass MVE_VCVT_f2h_m<string iname, int half> {
+ def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(v8f16 (int_arm_mve_vcvt_narrow
+ (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
+ (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
+ def : Pat<(v8f16 (int_arm_mve_vcvt_narrow_predicated
+ (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half),
+ (v4i1 VCCR:$mask))),
+ (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
+ ARMVCCThen, (v4i1 VCCR:$mask)))>;
+ }
}
-defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>;
-defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>;
+multiclass MVE_VCVT_h2f_m<string iname, int half> {
+ def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half>;
+}
+
+defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
+defm MVE_VCVTf16f32th : MVE_VCVT_f2h_m<"vcvtt", 0b1>;
+defm MVE_VCVTf32f16bh : MVE_VCVT_h2f_m<"vcvtb", 0b0>;
+defm MVE_VCVTf32f16th : MVE_VCVT_h2f_m<"vcvtt", 0b1>;
class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
- string cstr="", list<dag> pattern=[]>
+ string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
bits<4> Qn;
bit rot;
@@ -3577,13 +4280,35 @@ class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
let Inst{0} = 0b0;
}
-def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>;
-def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>;
-def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VxCADD_m<string iname, MVEVectorVTInfo VTI,
+ bit halve, string cstr=""> {
+ def "" : MVE_VxCADD<iname, VTI.Suffix, VTI.Size, halve, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq halve,
+ imm:$rot, (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vcaddq_predicated halve,
+ imm:$rot, (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+
+ }
+}
+
+defm MVE_VCADDi8 : MVE_VxCADD_m<"vcadd", MVE_v16i8, 0b1>;
+defm MVE_VCADDi16 : MVE_VxCADD_m<"vcadd", MVE_v8i16, 0b1>;
+defm MVE_VCADDi32 : MVE_VxCADD_m<"vcadd", MVE_v4i32, 0b1, "@earlyclobber $Qd">;
-def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>;
-def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>;
-def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">;
+defm MVE_VHCADDs8 : MVE_VxCADD_m<"vhcadd", MVE_v16s8, 0b0>;
+defm MVE_VHCADDs16 : MVE_VxCADD_m<"vhcadd", MVE_v8s16, 0b0>;
+defm MVE_VHCADDs32 : MVE_VxCADD_m<"vhcadd", MVE_v4s32, 0b0, "@earlyclobber $Qd">;
class MVE_VADCSBC<string iname, bit I, bit subtract,
dag carryin, list<dag> pattern=[]>
@@ -3627,6 +4352,7 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
let Inst{8} = 0b1;
let Inst{7} = Qn{3};
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
}
multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> {
@@ -3742,6 +4468,7 @@ class MVE_VQDMULL_qr<string iname, string suffix, bit size,
let Inst{12} = T;
let Inst{8} = 0b1;
let Inst{5} = 0b1;
+ let validForTailPredication = 1;
}
multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> {
@@ -3804,13 +4531,30 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
let validForTailPredication = 1;
}
+multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
+ def "" : MVE_VxSHL_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, q, r>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vshl_scalar_predicated
+ (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ (i32 q), (i32 r), (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+}
+
multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
- def s8 : MVE_VxSHL_qr<iname, "s8", 0b0, 0b00, bit_7, bit_17>;
- def s16 : MVE_VxSHL_qr<iname, "s16", 0b0, 0b01, bit_7, bit_17>;
- def s32 : MVE_VxSHL_qr<iname, "s32", 0b0, 0b10, bit_7, bit_17>;
- def u8 : MVE_VxSHL_qr<iname, "u8", 0b1, 0b00, bit_7, bit_17>;
- def u16 : MVE_VxSHL_qr<iname, "u16", 0b1, 0b01, bit_7, bit_17>;
- def u32 : MVE_VxSHL_qr<iname, "u32", 0b1, 0b10, bit_7, bit_17>;
+ defm s8 : MVE_VxSHL_qr_p<iname, MVE_v16s8, bit_7, bit_17>;
+ defm s16 : MVE_VxSHL_qr_p<iname, MVE_v8s16, bit_7, bit_17>;
+ defm s32 : MVE_VxSHL_qr_p<iname, MVE_v4s32, bit_7, bit_17>;
+ defm u8 : MVE_VxSHL_qr_p<iname, MVE_v16u8, bit_7, bit_17>;
+ defm u16 : MVE_VxSHL_qr_p<iname, MVE_v8u16, bit_7, bit_17>;
+ defm u32 : MVE_VxSHL_qr_p<iname, MVE_v4u32, bit_7, bit_17>;
}
defm MVE_VSHL_qr : MVE_VxSHL_qr_types<"vshl", 0b0, 0b0>;
@@ -4054,7 +4798,7 @@ def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
let hasSideEffects = 1 in
-class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
+class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
"$Rn", vpred_n, "", pattern> {
bits<4> Rn;
@@ -4072,20 +4816,22 @@ class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VCTP8 : MVE_VCTP<"8", 0b00>;
-def MVE_VCTP16 : MVE_VCTP<"16", 0b01>;
-def MVE_VCTP32 : MVE_VCTP<"32", 0b10>;
-def MVE_VCTP64 : MVE_VCTP<"64", 0b11>;
+multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
+ def "": MVE_VCTPInst<VTI.BitsSuffix, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
-let Predicates = [HasMVEInt] in {
- def : Pat<(int_arm_vctp8 rGPR:$Rn),
- (v16i1 (MVE_VCTP8 rGPR:$Rn))>;
- def : Pat<(int_arm_vctp16 rGPR:$Rn),
- (v8i1 (MVE_VCTP16 rGPR:$Rn))>;
- def : Pat<(int_arm_vctp32 rGPR:$Rn),
- (v4i1 (MVE_VCTP32 rGPR:$Rn))>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(intr rGPR:$Rn), (VTI.Pred (Inst rGPR:$Rn))>;
+ def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
+ (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
+ }
}
+defm MVE_VCTP8 : MVE_VCTP<MVE_v16i8, int_arm_mve_vctp8>;
+defm MVE_VCTP16 : MVE_VCTP<MVE_v8i16, int_arm_mve_vctp16>;
+defm MVE_VCTP32 : MVE_VCTP<MVE_v4i32, int_arm_mve_vctp32>;
+defm MVE_VCTP64 : MVE_VCTP<MVE_v2i64, int_arm_mve_vctp64>;
+
// end of mve_qDest_rSrc
// start of coproc mov
@@ -4258,6 +5004,29 @@ foreach wb = [MVE_vldst24_writeback<
"vst" # n.nvecs # stage # "." # s.lanesize>;
}
+multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
+ foreach stage = [0,1] in
+ def : Pat<(int_arm_mve_vst2q i32:$addr,
+ (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
+ (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
+ (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ t2_addr_offset_none:$addr)>;
+
+ foreach stage = [0,1,2,3] in
+ def : Pat<(int_arm_mve_vst4q i32:$addr,
+ (VT MQPR:$v0), (VT MQPR:$v1),
+ (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
+ (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
+ (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
+ t2_addr_offset_none:$addr)>;
+}
+defm : MVE_vst24_patterns<8, v16i8>;
+defm : MVE_vst24_patterns<16, v8i16>;
+defm : MVE_vst24_patterns<32, v4i32>;
+defm : MVE_vst24_patterns<16, v8f16>;
+defm : MVE_vst24_patterns<32, v4f32>;
+
// end of MVE interleaving load/store
// start of MVE predicable load/store
@@ -4513,28 +5282,90 @@ class MVE_VLDRSTR_rq_b<MVE_ldst_direction dir, MVE_memsz memsz,
string asm, string suffix, bit U, bits<2> size>
: MVE_VLDRSTR_rq<dir, memsz, U, size, 0, asm, suffix, 0>;
+// Multiclasses wrapping that to add ISel patterns for intrinsics.
+multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
+ defm "": MVE_VLDRSTR_rq_w<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
+ VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar InstU = !cast<Instruction>(NAME # "_u");
+
+ foreach VTI = VTIs in
+ foreach UnsignedFlag = !if(!eq(VTI.Size, memsz.encoding),
+ [0,1], [VTI.Unsigned]) in {
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag)),
+ (VTI.Vec (InstU GPR:$base, MQPR:$offsets))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ }
+}
+multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
+ def "": MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb",
+ VTIs[0].Suffix, VTIs[0].Unsigned, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ foreach VTI = VTIs in {
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ }
+}
+multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
+ defm "": MVE_VLDRSTR_rq_w<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
+ VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar InstU = !cast<Instruction>(NAME # "_u");
+
+ foreach VTI = VTIs in {
+ def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0),
+ (InstU MQPR:$data, GPR:$base, MQPR:$offsets)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
+ (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ }
+}
+multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
+ def "": MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb",
+ VTIs[0].BitsSuffix, 0, VTIs[0].Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ foreach VTI = VTIs in {
+ def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
+ def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ }
+}
+
// Actually define all the loads and stores in this family.
-def MVE_VLDRBU8_rq : MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u8", 1,0b00>;
-def MVE_VLDRBU16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u16", 1,0b01>;
-def MVE_VLDRBS16_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s16", 0,0b01>;
-def MVE_VLDRBU32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","u32", 1,0b10>;
-def MVE_VLDRBS32_rq: MVE_VLDRSTR_rq_b<MVE_ld, MVE_memB, "vldrb","s32", 0,0b10>;
+defm MVE_VLDRBU8_rq : MVE_VLDR_rq_b<[MVE_v16u8,MVE_v16s8]>;
+defm MVE_VLDRBU16_rq: MVE_VLDR_rq_b<[MVE_v8u16]>;
+defm MVE_VLDRBS16_rq: MVE_VLDR_rq_b<[MVE_v8s16]>;
+defm MVE_VLDRBU32_rq: MVE_VLDR_rq_b<[MVE_v4u32]>;
+defm MVE_VLDRBS32_rq: MVE_VLDR_rq_b<[MVE_v4s32]>;
-defm MVE_VLDRHU16_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u16", 1,0b01>;
-defm MVE_VLDRHU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","u32", 1,0b10>;
-defm MVE_VLDRHS32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memH, "vldrh","s32", 0,0b10>;
-defm MVE_VLDRWU32_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memW, "vldrw","u32", 1,0b10>;
-defm MVE_VLDRDU64_rq: MVE_VLDRSTR_rq_w<MVE_ld, MVE_memD, "vldrd","u64", 1,0b11>;
+defm MVE_VLDRHU16_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v8u16,MVE_v8s16,MVE_v8f16]>;
+defm MVE_VLDRHU32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4u32]>;
+defm MVE_VLDRHS32_rq: MVE_VLDR_rq_w<MVE_memH, [MVE_v4s32]>;
+defm MVE_VLDRWU32_rq: MVE_VLDR_rq_w<MVE_memW, [MVE_v4u32,MVE_v4s32,MVE_v4f32]>;
+defm MVE_VLDRDU64_rq: MVE_VLDR_rq_w<MVE_memD, [MVE_v2u64,MVE_v2s64]>;
-def MVE_VSTRB8_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","8", 0,0b00>;
-def MVE_VSTRB16_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","16", 0,0b01>;
-def MVE_VSTRB32_rq : MVE_VLDRSTR_rq_b<MVE_st, MVE_memB, "vstrb","32", 0,0b10>;
+defm MVE_VSTRB8_rq : MVE_VSTR_rq_b<[MVE_v16i8]>;
+defm MVE_VSTRB16_rq : MVE_VSTR_rq_b<[MVE_v8i16]>;
+defm MVE_VSTRB32_rq : MVE_VSTR_rq_b<[MVE_v4i32]>;
-defm MVE_VSTRH16_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","16", 0,0b01>;
-defm MVE_VSTRH32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memH, "vstrh","32", 0,0b10>;
-defm MVE_VSTRW32_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memW, "vstrw","32", 0,0b10>;
-defm MVE_VSTRD64_rq : MVE_VLDRSTR_rq_w<MVE_st, MVE_memD, "vstrd","64", 0,0b11>;
+defm MVE_VSTRH16_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v8i16,MVE_v8f16]>;
+defm MVE_VSTRH32_rq : MVE_VSTR_rq_w<MVE_memH, [MVE_v4i32]>;
+defm MVE_VSTRW32_rq : MVE_VSTR_rq_w<MVE_memW, [MVE_v4i32,MVE_v4f32]>;
+defm MVE_VSTRD64_rq : MVE_VSTR_rq_w<MVE_memD, [MVE_v2i64]>;
// Gather loads / scatter stores whose address operand is of the form
// [Qm,#imm], i.e. a vector containing a full base address for each
@@ -4573,11 +5404,58 @@ multiclass MVE_VLDRSTR_qi_m<MVE_ldst_direction dir, MVE_memsz memsz,
}
}
+// Multiclasses wrapping that one, adding selection patterns for the
+// non-writeback loads and all the stores. (The writeback loads must
+// deliver multiple output values, so they have to be selected by C++
+// code.)
+multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
+ list<MVEVectorVTInfo> DVTIs> {
+ defm "" : MVE_VLDRSTR_qi_m<MVE_ld, memsz, "vldr" # memsz.MnemonicLetter,
+ "u" # memsz.TypeBits>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ foreach DVTI = DVTIs in {
+ def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset))),
+ (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset)))>;
+ def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
+ (DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset),
+ ARMVCCThen, VCCR:$pred))>;
+ }
+}
+multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
+ list<MVEVectorVTInfo> DVTIs> {
+ defm "" : MVE_VLDRSTR_qi_m<MVE_st, memsz, "vstr" # memsz.MnemonicLetter,
+ !cast<string>(memsz.TypeBits)>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar InstPre = !cast<Instruction>(NAME # "_pre");
+
+ foreach DVTI = DVTIs in {
+ def : Pat<(int_arm_mve_vstr_scatter_base
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data)),
+ (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset))>;
+ def : Pat<(int_arm_mve_vstr_scatter_base_predicated
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
+ (Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
+ (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset)))>;
+ def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
+ (AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
+ (AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>;
+ }
+}
+
// Actual instruction definitions.
-defm MVE_VLDRWU32_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memW, "vldrw", "u32">;
-defm MVE_VLDRDU64_qi: MVE_VLDRSTR_qi_m<MVE_ld, MVE_memD, "vldrd", "u64">;
-defm MVE_VSTRW32_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memW, "vstrw", "32">;
-defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">;
+defm MVE_VLDRWU32_qi: MVE_VLDR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
+defm MVE_VLDRDU64_qi: MVE_VLDR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
+defm MVE_VSTRW32_qi: MVE_VSTR_qi<MVE_memW, MVE_v4i32, [MVE_v4i32,MVE_v4f32]>;
+defm MVE_VSTRD64_qi: MVE_VSTR_qi<MVE_memD, MVE_v2i64, [MVE_v2i64,MVE_v2f64]>;
// Define aliases for all the instructions where memory size and
// vector lane size are the same. These are mnemonic aliases, so they
@@ -4595,21 +5473,21 @@ defm MVE_VSTRD64_qi: MVE_VLDRSTR_qi_m<MVE_st, MVE_memD, "vstrd", "64">;
foreach vpt_cond = ["", "t", "e"] in
foreach memsz = [MVE_memB, MVE_memH, MVE_memW, MVE_memD] in
foreach suffix = memsz.suffixes in {
+ // Define an alias with every suffix in the list, except for the one
+ // used by the real Instruction record (i.e. the one that all the
+ // rest are aliases *for*).
+
+ if !ne(suffix, memsz.CanonLoadSuffix) then {
+ def : MnemonicAlias<
+ "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
+ "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
+ }
- // These foreaches are conceptually ifs, implemented by iterating a
- // dummy variable over a list with 0 or 1 elements depending on the
- // condition. The idea is to iterate over _nearly_ all the suffixes
- // in memsz.suffixes, but omit the one we want all the others to alias.
-
- foreach _ = !if(!ne(suffix, memsz.CanonLoadSuffix), [1], []<int>) in
- def : MnemonicAlias<
- "vldr" # memsz.MnemonicLetter # vpt_cond # suffix,
- "vldr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonLoadSuffix>;
-
- foreach _ = !if(!ne(suffix, memsz.CanonStoreSuffix), [1], []<int>) in
- def : MnemonicAlias<
- "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
- "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
+ if !ne(suffix, memsz.CanonStoreSuffix) then {
+ def : MnemonicAlias<
+ "vstr" # memsz.MnemonicLetter # vpt_cond # suffix,
+ "vstr" # memsz.MnemonicLetter # vpt_cond # memsz.CanonStoreSuffix>;
+ }
}
// end of MVE predicable load/store
@@ -4632,7 +5510,6 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte
let Inst{4} = 0b0;
let Defs = [VPR];
- let validForTailPredication = 1;
}
class MVE_VPTt1<string suffix, bits<2> size, dag iops>
@@ -4644,7 +5521,6 @@ class MVE_VPTt1<string suffix, bits<2> size, dag iops>
let Inst{5} = Qm{3};
let Inst{3-1} = Qm{2-0};
let Inst{0} = fc{1};
- let validForTailPredication = 1;
}
class MVE_VPTt1i<string suffix, bits<2> size>
@@ -4746,7 +5622,6 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=
let Defs = [VPR];
let Predicates = [HasMVEFloat];
- let validForTailPredication = 1;
}
class MVE_VPTft1<string suffix, bit size>
@@ -4816,7 +5691,6 @@ def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
let Inst{4} = 0b0;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
- let validForTailPredication = 1;
}
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32",
@@ -4826,87 +5700,87 @@ def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
- (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
- (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
- (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
- (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
- (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
- (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>;
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>;
def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
- (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
- (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
- (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
- (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
// Pred <-> Int
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))),
- (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>;
+ (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))),
- (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>;
+ (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))),
- (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>;
+ (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, ARMCCne))>;
}
let Predicates = [HasMVEFloat] in {
// Pred <-> Float
// 112 is 1.0 in float
def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
// 2620 in 1.0 in half
def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
// 240 is -1.0 in float
def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
// 2748 is -1.0 in half
def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>;
}
def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
@@ -4955,6 +5829,8 @@ class MVE_WLSTP<string asm, bits<2> size>
let Inst{13} = 0b0;
let Inst{11} = label{0};
let Inst{10-1} = label{10-1};
+ let isBranch = 1;
+ let isTerminator = 1;
}
def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>;
@@ -4983,6 +5859,8 @@ def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout),
let Inst{13} = 0b0;
let Inst{11} = label{0};
let Inst{10-1} = label{10-1};
+ let isBranch = 1;
+ let isTerminator = 1;
}
def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
@@ -4998,61 +5876,7 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
// Patterns
//===----------------------------------------------------------------------===//
-class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag StoreKind, int shift>
- : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
- (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
-class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag StoreKind, int shift>
- : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
- (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred)>;
-
-multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
- int shift> {
- def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
- def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
-}
-
-class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag LoadKind, int shift>
- : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
- (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
-class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
- PatFrag LoadKind, int shift>
- : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
- (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred))>;
-
-multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
- int shift> {
- def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
- def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
-}
-
-class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
- PatFrag StoreKind, int shift>
- : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
- (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
-
-multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
- int shift> {
- def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
- def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
-}
+// PatFrags for loads and stores. Often trying to keep semi-consistent names.
def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(pre_store node:$val, node:$ptr, node:$offset), [{
@@ -5072,77 +5896,249 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
}]>;
-def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
return Ld->getMemoryVT().getScalarType() == MVT::i8;
}]>;
-def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
}]>;
-def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
}]>;
-def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (maskedload8 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
}]>;
-def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2;
}]>;
-def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
}]>;
-def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
}]>;
-def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{
+def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD;
}]>;
-def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
- (masked_ld node:$ptr, node:$pred, node:$passthru), [{
+def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru),
+ (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{
auto *Ld = cast<MaskedLoadSDNode>(N);
EVT ScalarVT = Ld->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4;
}]>;
-def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
-def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (maskedstore8 node:$val, node:$ptr, node:$pred), [{
- return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
+ (masked_st node:$val, node:$ptr, undef, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
}]>;
-def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+
+def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
+ (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
+}]>;
+def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask),
+ (masked_st node:$val, node:$base, node:$offset, node:$mask), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return AM == ISD::POST_INC || AM == ISD::POST_DEC;
+}]>;
+def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
auto *St = cast<MaskedStoreSDNode>(N);
EVT ScalarVT = St->getMemoryVT().getScalarType();
return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
}]>;
+def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
+}]>;
+def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask),
+ (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
+}]>;
+
+
+// PatFrags for "Aligned" extending / truncating
-def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (maskedstore16 node:$val, node:$ptr, node:$pred), [{
+def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>;
+def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>;
+def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>;
+
+def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorevi8 node:$val, node:$ptr)>;
+def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (post_truncstvi8 node:$val, node:$base, node:$offset)>;
+def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncstvi8 node:$val, node:$base, node:$offset)>;
+
+let MinAlignment = 2 in {
+ def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
+ def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
+ def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
+
+ def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorevi16 node:$val, node:$ptr)>;
+ def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (post_truncstvi16 node:$val, node:$base, node:$offset)>;
+ def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncstvi16 node:$val, node:$base, node:$offset)>;
+}
+
+def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred),
+ (masked_st node:$val, node:$base, undef, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;
-def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred),
- (masked_st node:$val, node:$ptr, node:$pred), [{
+def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred),
+ (truncmaskedst node:$val, node:$base, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred),
+ (truncmaskedst node:$val, node:$base, node:$pred), [{
auto *St = cast<MaskedStoreSDNode>(N);
EVT ScalarVT = St->getMemoryVT().getScalarType();
- return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4;
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
}]>;
+def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (masked_st node:$val, node:$base, node:$offset, node:$pred), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
+}]>;
+def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred),
+ (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (masked_st node:$val, node:$base, node:$offset, node:$postd), [{
+ ISD::MemIndexedMode AM = cast<MaskedStoreSDNode>(N)->getAddressingMode();
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC);
+}]>;
+def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd),
+ (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{
+ auto *St = cast<MaskedStoreSDNode>(N);
+ EVT ScalarVT = St->getMemoryVT().getScalarType();
+ return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2;
+}]>;
+
+// Load/store patterns
+
+class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr),
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>;
+
+class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+
+multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
+ int shift> {
+ def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
+class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag LoadKind, int shift>
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)),
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>;
+
+class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
+ PatFrag LoadKind, int shift>
+ : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))),
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+
+multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
+ int shift> {
+ def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>;
+ def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
+}
+
+class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
+
+class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
+ PatFrag StoreKind, int shift>
+ : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+
+multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
+ int shift> {
+ def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
+ def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
+}
+
let Predicates = [HasMVEInt, IsLE] in {
// Stores
@@ -5220,116 +6216,73 @@ let Predicates = [HasMVEInt, IsBE] in {
let Predicates = [HasMVEInt] in {
// Aligned masked store, shared between LE and BE
- def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, maskedstore8, 0>;
- def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, maskedstore16, 1>;
- def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, maskedstore16, 1>;
- def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, maskedstore32, 2>;
- def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, maskedstore32, 2>;
- // Truncating stores
- def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
- (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
- def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred),
- (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>;
- def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred),
- (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>;
+ def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, aligned_maskedstvi8, 0>;
+ def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
+ def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, aligned_maskedstvi16, 1>;
+ def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
+ def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, aligned_maskedstvi32, 2>;
+
+ // Pre/Post inc masked stores
+ def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_pre, aligned_pre_maskedstorevi8, 0>;
+ def : MVE_vector_offset_maskedstore_typed<v16i8, MVE_VSTRBU8_post, aligned_post_maskedstorevi8, 0>;
+ def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8i16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_pre, aligned_pre_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v8f16, MVE_VSTRHU16_post, aligned_post_maskedstorevi16, 1>;
+ def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4i32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_pre, aligned_pre_maskedstorevi32, 2>;
+ def : MVE_vector_offset_maskedstore_typed<v4f32, MVE_VSTRWU32_post, aligned_post_maskedstorevi32, 2>;
+
// Aligned masked loads
- def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>;
- def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>;
- def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>;
- def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>;
- def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>;
- // Extending masked loads.
- def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v8i16 NEONimmAllZerosV))),
- (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
- def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred,
- (v4i32 NEONimmAllZerosV))),
- (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>;
+ def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, aligned_maskedloadvi8, 0>;
+ def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
+ def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, aligned_maskedloadvi16, 1>;
+ def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
+ def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, aligned_maskedloadvi32, 2>;
}
// Widening/Narrowing Loads/Stores
-let MinAlignment = 2 in {
- def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr),
- (truncstorevi16 node:$val, node:$ptr)>;
- def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
- (post_truncstvi16 node:$val, node:$base, node:$offset)>;
- def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
- (pre_truncstvi16 node:$val, node:$base, node:$offset)>;
-}
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr),
- (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>;
- def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr),
- (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>;
- def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr),
- (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>;
-
- def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
- (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
-
- def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
- (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
- def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
- (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
-}
-
-
-let MinAlignment = 2 in {
- def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>;
- def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>;
- def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>;
-}
-
-multiclass MVEExtLoad<string DestLanes, string DestElemBits,
- string SrcElemBits, string SrcElemType,
- string Align, Operand am> {
- def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("extloadvi" # SrcElemBits # Align) am:$addr)),
- (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
- am:$addr)>;
- def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("zextloadvi" # SrcElemBits # Align) am:$addr)),
- (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits)
- am:$addr)>;
- def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits)
- (!cast<PatFrag>("sextloadvi" # SrcElemBits # Align) am:$addr)),
- (!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits)
- am:$addr)>;
+multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string StoreInst,
+ string Amble, ValueType VT, int Shift> {
+ // Trunc stores
+ def : Pat<(!cast<PatFrag>("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr),
+ (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr)>;
+ def : Pat<(!cast<PatFrag>("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
+ (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
+ def : Pat<(!cast<PatFrag>("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr),
+ (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr)>;
+
+ // Masked trunc stores
+ def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred),
+ (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
+ (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
+ (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+
+ // Ext loads
+ def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_sextload"#Amble) taddrmode_imm7<Shift>:$addr)),
+ (VT (LoadSInst taddrmode_imm7<Shift>:$addr))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_zextload"#Amble) taddrmode_imm7<Shift>:$addr)),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr))>;
+
+ // Masked ext loads
+ def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT NEONimmAllZerosV))),
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
}
let Predicates = [HasMVEInt] in {
- defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>;
- defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>;
- defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>;
+ defm : MVEExtLoadStore<MVE_VLDRBS16, MVE_VLDRBU16, "MVE_VSTRB16", "vi8", v8i16, 0>;
+ defm : MVEExtLoadStore<MVE_VLDRBS32, MVE_VLDRBU32, "MVE_VSTRB32", "vi8", v4i32, 0>;
+ defm : MVEExtLoadStore<MVE_VLDRHS32, MVE_VLDRHU32, "MVE_VSTRH32", "vi16", v4i32, 1>;
}