summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM/ARMInstrMVE.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM/ARMInstrMVE.td')
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td2090
1 files changed, 1462 insertions, 628 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 604291be822c4..2a1f50d97e3b3 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -10,44 +10,6 @@
//
//===----------------------------------------------------------------------===//
-class ExpandImmAsmOp<string shift> : AsmOperandClass {
- let Name = !strconcat("ExpandImm", shift);
- let PredicateMethod = !strconcat("isExpImm<", shift, ">");
- let RenderMethod = "addImmOperands";
-}
-class InvertedExpandImmAsmOp<string shift, string size> : AsmOperandClass {
- let Name = !strconcat("InvertedExpandImm", shift, "_", size);
- let PredicateMethod = !strconcat("isInvertedExpImm<", shift, ",", size, ">");
- let RenderMethod = "addImmOperands";
-}
-
-class ExpandImm<string shift> : Operand<i32> {
- let ParserMatchClass = ExpandImmAsmOp<shift>;
- let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",false>");
- let DecoderMethod = !strconcat("DecodeExpandedImmOperand<",shift,">");
- let PrintMethod = "printExpandedImmOperand";
-}
-class InvertedExpandImm<string shift, string size> : Operand<i32> {
- let ParserMatchClass = InvertedExpandImmAsmOp<shift, size>;
- let EncoderMethod = !strconcat("getExpandedImmOpValue<",shift,",true>");
- let PrintMethod = "printExpandedImmOperand";
- // No decoder method needed, because this operand type is only used
- // by aliases (VAND and VORN)
-}
-
-def expzero00 : ExpandImm<"0">;
-def expzero08 : ExpandImm<"8">;
-def expzero16 : ExpandImm<"16">;
-def expzero24 : ExpandImm<"24">;
-
-def expzero00inv16 : InvertedExpandImm<"0", "16">;
-def expzero08inv16 : InvertedExpandImm<"8", "16">;
-
-def expzero00inv32 : InvertedExpandImm<"0", "32">;
-def expzero08inv32 : InvertedExpandImm<"8", "32">;
-def expzero16inv32 : InvertedExpandImm<"16", "32">;
-def expzero24inv32 : InvertedExpandImm<"24", "32">;
-
// VPT condition mask
def vpt_mask : Operand<i32> {
let PrintMethod = "printVPTMask";
@@ -277,7 +239,8 @@ class mve_addr_q_shift<int shift> : MemOperand {
// A family of classes wrapping up information about the vector types
// used by MVE.
-class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
+class MVEVectorVTInfo<ValueType vec, ValueType dblvec,
+ ValueType pred, ValueType dblpred,
bits<2> size, string suffixletter, bit unsigned> {
// The LLVM ValueType representing the vector, so we can use it in
// ISel patterns.
@@ -300,6 +263,9 @@ class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
// directly.
ValueType Pred = pred;
+ // Same as Pred but for DblVec rather than Vec.
+ ValueType DblPred = dblpred;
+
// The most common representation of the vector element size in MVE
// instruction encodings: a 2-bit value V representing an (8<<V)-bit
// vector element.
@@ -319,38 +285,38 @@ class MVEVectorVTInfo<ValueType vec, ValueType dblvec, ValueType pred,
!cast<string>(LaneBits));
// The suffix used on an instruction that mentions the whole type.
- string Suffix = suffixletter ## BitsSuffix;
+ string Suffix = suffixletter # BitsSuffix;
// The letter part of the suffix only.
string SuffixLetter = suffixletter;
}
// Integer vector types that don't treat signed and unsigned differently.
-def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "i", ?>;
-def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "i", ?>;
-def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "i", ?>;
-def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "i", ?>;
+def MVE_v16i8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "i", ?>;
+def MVE_v8i16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "i", ?>;
+def MVE_v4i32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "i", ?>;
+def MVE_v2i64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "i", ?>;
// Explicitly signed and unsigned integer vectors. They map to the
// same set of LLVM ValueTypes as above, but are represented
// differently in assembly and instruction encodings.
-def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "s", 0b0>;
-def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "s", 0b0>;
-def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "s", 0b0>;
-def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "s", 0b0>;
-def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b00, "u", 0b1>;
-def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b01, "u", 0b1>;
-def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, 0b10, "u", 0b1>;
-def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, 0b11, "u", 0b1>;
+def MVE_v16s8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "s", 0b0>;
+def MVE_v8s16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "s", 0b0>;
+def MVE_v4s32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "s", 0b0>;
+def MVE_v2s64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "s", 0b0>;
+def MVE_v16u8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b00, "u", 0b1>;
+def MVE_v8u16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b01, "u", 0b1>;
+def MVE_v4u32 : MVEVectorVTInfo<v4i32, v2i64, v4i1, v4i1, 0b10, "u", 0b1>;
+def MVE_v2u64 : MVEVectorVTInfo<v2i64, ?, v4i1, ?, 0b11, "u", 0b1>;
// FP vector types.
-def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, 0b01, "f", ?>;
-def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, 0b10, "f", ?>;
-def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, 0b11, "f", ?>;
+def MVE_v8f16 : MVEVectorVTInfo<v8f16, v4f32, v8i1, v4i1, 0b01, "f", ?>;
+def MVE_v4f32 : MVEVectorVTInfo<v4f32, v2f64, v4i1, v4i1, 0b10, "f", ?>;
+def MVE_v2f64 : MVEVectorVTInfo<v2f64, ?, v4i1, ?, 0b11, "f", ?>;
// Polynomial vector types.
-def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, 0b11, "p", 0b0>;
-def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, 0b11, "p", 0b1>;
+def MVE_v16p8 : MVEVectorVTInfo<v16i8, v8i16, v16i1, v8i1, 0b11, "p", 0b0>;
+def MVE_v8p16 : MVEVectorVTInfo<v8i16, v4i32, v8i1, v4i1, 0b11, "p", 0b1>;
// --------- Start of base classes for the instructions themselves
@@ -473,6 +439,8 @@ class MVE_ScalarShiftDoubleReg<string iname, dag iops, string asm,
let Inst{19-17} = RdaLo{3-1};
let Inst{11-9} = RdaHi{3-1};
+
+ let hasSideEffects = 0;
}
class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16,
@@ -590,6 +558,7 @@ class MVE_VABAV<string suffix, bit U, bits<2> size>
let Inst{5} = Qm{3};
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b1;
+ let horizontalReduction = 1;
}
multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
@@ -639,38 +608,63 @@ class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let horizontalReduction = 1;
+ let validForTailPredication = 1;
}
-multiclass MVE_VADDV_A<string suffix, bit U, bits<2> size,
- list<dag> pattern=[]> {
- def acc : MVE_VADDV<"vaddva", suffix,
+def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>;
+def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>;
+
+multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
+ def acc : MVE_VADDV<"vaddva", VTI.Suffix,
(ins tGPREven:$Rda_src, MQPR:$Qm), "$Rda = $Rda_src",
- 0b1, U, size, pattern>;
- def no_acc : MVE_VADDV<"vaddv", suffix,
+ 0b1, VTI.Unsigned, VTI.Size>;
+ def no_acc : MVE_VADDV<"vaddv", VTI.Suffix,
(ins MQPR:$Qm), "",
- 0b0, U, size, pattern>;
-}
+ 0b0, VTI.Unsigned, VTI.Size>;
-defm MVE_VADDVs8 : MVE_VADDV_A<"s8", 0b0, 0b00>;
-defm MVE_VADDVs16 : MVE_VADDV_A<"s16", 0b0, 0b01>;
-defm MVE_VADDVs32 : MVE_VADDV_A<"s32", 0b0, 0b10>;
-defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>;
-defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>;
-defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>;
+ defvar InstA = !cast<Instruction>(NAME # "acc");
+ defvar InstN = !cast<Instruction>(NAME # "no_acc");
-let Predicates = [HasMVEInt] in {
- def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>;
- def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>;
- def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))),
- (i32 (MVE_VADDVu32acc $src2, $src1))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))),
- (i32 (MVE_VADDVu16acc $src2, $src1))>;
- def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))),
- (i32 (MVE_VADDVu8acc $src2, $src1))>;
+ let Predicates = [HasMVEInt] in {
+ if VTI.Unsigned then {
+ def : Pat<(i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
+ (i32 (InstN $vec))>;
+ def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
+ (i32 (InstN $vec))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec))>;
+ } else {
+ def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
+ (i32 (InstN $vec))>;
+ def : Pat<(i32 (add (i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec))>;
+ }
+ def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
+ (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$pred))),
+ (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ def : Pat<(i32 (add (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
+ (i32 VTI.Unsigned),
+ (VTI.Pred VCCR:$pred)),
+ (i32 tGPREven:$acc))),
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ }
}
+defm MVE_VADDVs8 : MVE_VADDV_A<MVE_v16s8>;
+defm MVE_VADDVs16 : MVE_VADDV_A<MVE_v8s16>;
+defm MVE_VADDVs32 : MVE_VADDV_A<MVE_v4s32>;
+defm MVE_VADDVu8 : MVE_VADDV_A<MVE_v16u8>;
+defm MVE_VADDVu16 : MVE_VADDV_A<MVE_v8u16>;
+defm MVE_VADDVu32 : MVE_VADDV_A<MVE_v4u32>;
+
class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
@@ -689,21 +683,58 @@ class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
-}
-
-multiclass MVE_VADDLV_A<string suffix, bit U, list<dag> pattern=[]> {
- def acc : MVE_VADDLV<"vaddlva", suffix,
+ let horizontalReduction = 1;
+}
+
+def SDTVecReduceL : SDTypeProfile<2, 1, [ // VADDLV
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>
+]>;
+def SDTVecReduceLA : SDTypeProfile<2, 3, [ // VADDLVA
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>
+]>;
+def SDTVecReduceLP : SDTypeProfile<2, 2, [ // VADDLVp
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<2>
+]>;
+def SDTVecReduceLPA : SDTypeProfile<2, 4, [ // VADDLVAp
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>, SDTCisVec<5>
+]>;
+
+multiclass MVE_VADDLV_A<MVEVectorVTInfo VTI> {
+ def acc : MVE_VADDLV<"vaddlva", VTI.Suffix,
(ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, MQPR:$Qm),
"$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src",
- 0b1, U, pattern>;
- def no_acc : MVE_VADDLV<"vaddlv", suffix,
+ 0b1, VTI.Unsigned>;
+ def no_acc : MVE_VADDLV<"vaddlv", VTI.Suffix,
(ins MQPR:$Qm), "",
- 0b0, U, pattern>;
-}
+ 0b0, VTI.Unsigned>;
+
+ defvar InstA = !cast<Instruction>(NAME # "acc");
+ defvar InstN = !cast<Instruction>(NAME # "no_acc");
+ defvar letter = VTI.SuffixLetter;
+ defvar ARMVADDLV = SDNode<"ARMISD::VADDLV" # letter, SDTVecReduceL>;
+ defvar ARMVADDLVA = SDNode<"ARMISD::VADDLVA" # letter, SDTVecReduceLA>;
+ defvar ARMVADDLVp = SDNode<"ARMISD::VADDLVp" # letter, SDTVecReduceLP>;
+ defvar ARMVADDLVAp = SDNode<"ARMISD::VADDLVAp" # letter, SDTVecReduceLPA>;
-defm MVE_VADDLVs32 : MVE_VADDLV_A<"s32", 0b0>;
-defm MVE_VADDLVu32 : MVE_VADDLV_A<"u32", 0b1>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(ARMVADDLV (v4i32 MQPR:$vec)),
+ (InstN (v4i32 MQPR:$vec))>;
+ def : Pat<(ARMVADDLVA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec)),
+ (InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec))>;
+ def : Pat<(ARMVADDLVp (v4i32 MQPR:$vec), (VTI.Pred VCCR:$pred)),
+ (InstN (v4i32 MQPR:$vec), ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ def : Pat<(ARMVADDLVAp tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
+ (VTI.Pred VCCR:$pred)),
+ (InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
+ ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ }
+}
+
+defm MVE_VADDLVs32 : MVE_VADDLV_A<MVE_v4s32>;
+defm MVE_VADDLVu32 : MVE_VADDLV_A<MVE_v4u32>;
class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
bit bit_17, bit bit_7, list<dag> pattern=[]>
@@ -724,25 +755,48 @@ class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
let Inst{6-5} = 0b00;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let horizontalReduction = 1;
let Predicates = [HasMVEFloat];
+ let hasSideEffects = 0;
}
-multiclass MVE_VMINMAXNMV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
- def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b1, bit_7, pattern>;
- def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b1, bit_7, pattern>;
-}
+multiclass MVE_VMINMAXNMV_p<string iname, bit notAbs, bit isMin,
+ MVEVectorVTInfo VTI, string intrBaseName,
+ ValueType Scalar, RegisterClass ScalarReg> {
+ def "": MVE_VMINMAXNMV<iname, VTI.Suffix, VTI.Size{0}, notAbs, isMin>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
+ defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");
-defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 0b1>;
-defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 0b0>;
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(Scalar (unpred_intr (Scalar ScalarReg:$prev),
+ (VTI.Vec MQPR:$vec))),
+ (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
+ (VTI.Vec MQPR:$vec)),
+ ScalarReg)>;
+ def : Pat<(Scalar (pred_intr (Scalar ScalarReg:$prev),
+ (VTI.Vec MQPR:$vec),
+ (VTI.Pred VCCR:$pred))),
+ (COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
+ (VTI.Vec MQPR:$vec),
+ ARMVCCThen, (VTI.Pred VCCR:$pred)),
+ ScalarReg)>;
+ }
+}
-multiclass MVE_VMINMAXNMAV_fty<string iname, bit bit_7, list<dag> pattern=[]> {
- def f32 : MVE_VMINMAXNMV<iname, "f32", 0b0, 0b0, bit_7, pattern>;
- def f16 : MVE_VMINMAXNMV<iname, "f16", 0b1, 0b0, bit_7, pattern>;
+multiclass MVE_VMINMAXNMV_fty<string iname, bit notAbs, bit isMin,
+ string intrBase> {
+ defm f32 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v4f32, intrBase,
+ f32, SPR>;
+ defm f16 : MVE_VMINMAXNMV_p<iname, notAbs, isMin, MVE_v8f16, intrBase,
+ f16, HPR>;
}
-defm MVE_VMINNMAV : MVE_VMINMAXNMAV_fty<"vminnmav", 0b1>;
-defm MVE_VMAXNMAV : MVE_VMINMAXNMAV_fty<"vmaxnmav", 0b0>;
+defm MVE_VMINNMV : MVE_VMINMAXNMV_fty<"vminnmv", 1, 1, "int_arm_mve_minnmv">;
+defm MVE_VMAXNMV : MVE_VMINMAXNMV_fty<"vmaxnmv", 1, 0, "int_arm_mve_maxnmv">;
+defm MVE_VMINNMAV: MVE_VMINMAXNMV_fty<"vminnmav", 0, 1, "int_arm_mve_minnmav">;
+defm MVE_VMAXNMAV: MVE_VMINMAXNMV_fty<"vmaxnmav", 0, 0, "int_arm_mve_maxnmav">;
class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
bit bit_17, bit bit_7, list<dag> pattern=[]>
@@ -762,33 +816,40 @@ class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
let Inst{6-5} = 0b00;
let Inst{3-1} = Qm{2-0};
let Inst{0} = 0b0;
+ let horizontalReduction = 1;
}
-multiclass MVE_VMINMAXV_p<string iname, bit bit_17, bit bit_7,
- MVEVectorVTInfo VTI, Intrinsic intr> {
+multiclass MVE_VMINMAXV_p<string iname, bit notAbs, bit isMin,
+ MVEVectorVTInfo VTI, string intrBaseName> {
def "": MVE_VMINMAXV<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
- bit_17, bit_7>;
- defvar Inst = !cast<Instruction>(NAME);
+ notAbs, isMin>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar unpred_intr = !cast<Intrinsic>(intrBaseName);
+ defvar pred_intr = !cast<Intrinsic>(intrBaseName#"_predicated");
+ defvar base_args = (? (i32 rGPR:$prev), (VTI.Vec MQPR:$vec));
+ defvar args = !if(notAbs, !con(base_args, (? (i32 VTI.Unsigned))),
+ base_args);
- let Predicates = [HasMVEInt] in
- def _pat : Pat<(i32 (intr (i32 rGPR:$prev), (VTI.Vec MQPR:$vec))),
- (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 !con(args, (unpred_intr))),
+ (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
+ def : Pat<(i32 !con(args, (pred_intr (VTI.Pred VCCR:$pred)))),
+ (i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec),
+ ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ }
}
-multiclass MVE_VMINMAXV_ty<string iname, bit bit_7,
- Intrinsic intr_s, Intrinsic intr_u> {
- defm s8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16s8, intr_s>;
- defm s16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8s16, intr_s>;
- defm s32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4s32, intr_s>;
- defm u8 : MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v16u8, intr_u>;
- defm u16: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v8u16, intr_u>;
- defm u32: MVE_VMINMAXV_p<iname, 1, bit_7, MVE_v4u32, intr_u>;
+multiclass MVE_VMINMAXV_ty<string iname, bit isMin, string intrBaseName> {
+ defm s8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16s8, intrBaseName>;
+ defm s16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8s16, intrBaseName>;
+ defm s32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4s32, intrBaseName>;
+ defm u8 : MVE_VMINMAXV_p<iname, 1, isMin, MVE_v16u8, intrBaseName>;
+ defm u16: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v8u16, intrBaseName>;
+ defm u32: MVE_VMINMAXV_p<iname, 1, isMin, MVE_v4u32, intrBaseName>;
}
-defm MVE_VMINV : MVE_VMINMAXV_ty<
- "vminv", 0b1, int_arm_mve_minv_s, int_arm_mve_minv_u>;
-defm MVE_VMAXV : MVE_VMINMAXV_ty<
- "vmaxv", 0b0, int_arm_mve_maxv_s, int_arm_mve_maxv_u>;
+defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">;
+defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0, "int_arm_mve_maxv">;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))),
@@ -819,14 +880,14 @@ let Predicates = [HasMVEInt] in {
}
-multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> {
- def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>;
- def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>;
- def s32 : MVE_VMINMAXV<iname, "s32", 0b0, 0b10, 0b0, bit_7>;
+multiclass MVE_VMINMAXAV_ty<string iname, bit isMin, string intrBaseName> {
+ defm s8 : MVE_VMINMAXV_p<iname, 0, isMin, MVE_v16s8, intrBaseName>;
+ defm s16: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v8s16, intrBaseName>;
+ defm s32: MVE_VMINMAXV_p<iname, 0, isMin, MVE_v4s32, intrBaseName>;
}
-defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
-defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
+defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 1, "int_arm_mve_minav">;
+defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">;
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
@@ -847,6 +908,12 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = bit_0;
+ let horizontalReduction = 1;
+ // Allow tail predication for non-exchanging versions. As this is also a
+ // horizontalReduction, ARMLowOverheadLoops will also have to check that
+ // the vector operands contain zeros in their false lanes for the instruction
+ // to be properly valid.
+ let validForTailPredication = !eq(X, 0);
}
multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
@@ -932,6 +999,58 @@ defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>;
defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>;
defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>;
+def SDTVecReduce2 : SDTypeProfile<1, 2, [ // VMLAV
+ SDTCisInt<0>, SDTCisVec<1>, SDTCisVec<2>
+]>;
+def SDTVecReduce2L : SDTypeProfile<2, 2, [ // VMLALV
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2>, SDTCisVec<3>
+]>;
+def SDTVecReduce2LA : SDTypeProfile<2, 4, [ // VMLALVA
+ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>,
+ SDTCisVec<4>, SDTCisVec<5>
+]>;
+def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>;
+def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>;
+def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>;
+def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>;
+def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>;
+def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>;
+
+let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu32 $src1, $src2))>;
+ def : Pat<(i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu16 $src1, $src2))>;
+ def : Pat<(i32 (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
+ (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
+ (i32 (MVE_VMLADAVu8 $src1, $src2))>;
+ def : Pat<(i32 (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(i32 (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
+ (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau32 $src3, $src1, $src2))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau16 $src3, $src1, $src2))>;
+ def : Pat<(i32 (add (ARMVMLAVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (add (ARMVMLAVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
+ def : Pat<(i32 (add (i32 (vecreduce_add (mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)))),
+ (i32 tGPREven:$src3))),
+ (i32 (MVE_VMLADAVau8 $src3, $src1, $src2))>;
+ def : Pat<(i32 (add (ARMVMLAVs (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+ def : Pat<(i32 (add (ARMVMLAVu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)), tGPREven:$Rd)),
+ (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
+}
+
// vmlav aliases vmladav
foreach acc = ["", "a"] in {
foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in {
@@ -963,6 +1082,14 @@ class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
let Inst{5} = A;
let Inst{3-1} = Qm{2-0};
let Inst{0} = bit_0;
+ let horizontalReduction = 1;
+ // Allow tail predication for non-exchanging versions. As this is also a
+ // horizontalReduction, ARMLowOverheadLoops will also have to check that
+ // the vector operands contain zeros in their false lanes for the instruction
+ // to be properly valid.
+ let validForTailPredication = !eq(X, 0);
+
+ let hasSideEffects = 0;
}
multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
@@ -1023,6 +1150,26 @@ multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(ARMVMLALVs (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVs (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+
+ def : Pat<(ARMVMLALVAs tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
+ (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVAs tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+ def : Pat<(ARMVMLALVAu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)),
+ (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))>;
+}
+
// vmlalv aliases vmlaldav
foreach acc = ["", "a"] in {
foreach suffix = ["s16", "s32", "u16", "u32"] in {
@@ -1244,28 +1391,29 @@ let Predicates = [HasMVEInt] in {
(v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>;
}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))),
- (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>;
- def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))),
- (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>;
- def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>;
+multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
+ Instruction Inst> {
+ defvar unpred_op = !cast<SDNode>("ARMvrev" # revbits);
- def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))),
- (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>;
- def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>;
+ foreach VTI = VTIs in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>;
+ def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
+ revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+let Predicates = [HasMVEInt] in {
+ defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>;
+ defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>;
+ defm: MVE_VREV_basic_patterns<64, [MVE_v16i8 ], MVE_VREV64_8>;
- def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))),
- (v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>;
+ defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>;
+ defm: MVE_VREV_basic_patterns<32, [MVE_v16i8 ], MVE_VREV32_8>;
- def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))),
- (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>;
- def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))),
- (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>;
- def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))),
- (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>;
+ defm: MVE_VREV_basic_patterns<16, [MVE_v16i8 ], MVE_VREV16_8>;
}
def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
@@ -1280,14 +1428,14 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
}
let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))),
- (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>;
- def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))),
- (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
- def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
- (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
- def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
- (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
+ foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in {
+ def : Pat<(VTI.Vec (vnotq (VTI.Vec MQPR:$val1))),
+ (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>;
+ def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
@@ -1383,10 +1531,10 @@ defm : MVE_bit_op_with_inv<MVE_v8i16, or, int_arm_mve_orn_predicated, MVE_VORN>;
defm : MVE_bit_op_with_inv<MVE_v4i32, or, int_arm_mve_orn_predicated, MVE_VORN>;
defm : MVE_bit_op_with_inv<MVE_v2i64, or, int_arm_mve_orn_predicated, MVE_VORN>;
-class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
+class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
- bits<8> imm;
+ bits<12> imm;
bits<4> Qd;
let Inst{28} = imm{7};
@@ -1396,66 +1544,59 @@ class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
let Inst{18-16} = imm{6-4};
let Inst{15-13} = Qd{2-0};
let Inst{12} = 0b0;
- let Inst{11-8} = cmode;
+ let Inst{11} = halfword;
+ let Inst{10} = !if(halfword, 0, imm{10});
+ let Inst{9} = imm{9};
+ let Inst{8} = 0b1;
let Inst{7-6} = 0b01;
let Inst{4} = 0b1;
let Inst{3-0} = imm{3-0};
}
-class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type>
- : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
- let Inst{5} = 0b0;
- let validForTailPredication = 1;
-}
+multiclass MVE_bit_cmode_p<string iname, bit opcode,
+ MVEVectorVTInfo VTI, Operand imm_type, SDNode op> {
+ def "" : MVE_bit_cmode<iname, VTI.Suffix, VTI.Size{0},
+ (ins MQPR:$Qd_src, imm_type:$imm)> {
+ let Inst{5} = opcode;
+ let validForTailPredication = 1;
+ }
-def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>;
-def MVE_VORRIZ0v8i16 : MVE_VORR<"i16", 0b1001, expzero00>;
-def MVE_VORRIZ8v4i32 : MVE_VORR<"i32", 0b0011, expzero08>;
-def MVE_VORRIZ8v8i16 : MVE_VORR<"i16", 0b1011, expzero08>;
-def MVE_VORRIZ16v4i32 : MVE_VORR<"i32", 0b0101, expzero16>;
-def MVE_VORRIZ24v4i32 : MVE_VORR<"i32", 0b0111, expzero24>;
-
-def MVE_VORNIZ0v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ0v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ8v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ8v8i16 : MVEAsmPseudo<"vorn${vp}.i16\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ16v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
-def MVE_VORNIZ24v4i32 : MVEAsmPseudo<"vorn${vp}.i32\t$Qd, $imm",
- (ins MQPR:$Qd_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qd)>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm));
-def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
- (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<UnpredPat, (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>;
+ def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
+ UnpredPat, (VTI.Vec MQPR:$src))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
+ ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ }
+}
-class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type>
- : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> {
- let Inst{5} = 0b1;
- let validForTailPredication = 1;
+multiclass MVE_VORRimm<MVEVectorVTInfo VTI, Operand imm_type> {
+ defm "": MVE_bit_cmode_p<"vorr", 0, VTI, imm_type, ARMvorrImm>;
+}
+multiclass MVE_VBICimm<MVEVectorVTInfo VTI, Operand imm_type> {
+ defm "": MVE_bit_cmode_p<"vbic", 1, VTI, imm_type, ARMvbicImm>;
}
-def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>;
-def MVE_VBICIZ0v8i16 : MVE_VBIC<"i16", 0b1001, expzero00>;
-def MVE_VBICIZ8v4i32 : MVE_VBIC<"i32", 0b0011, expzero08>;
-def MVE_VBICIZ8v8i16 : MVE_VBIC<"i16", 0b1011, expzero08>;
-def MVE_VBICIZ16v4i32 : MVE_VBIC<"i32", 0b0101, expzero16>;
-def MVE_VBICIZ24v4i32 : MVE_VBIC<"i32", 0b0111, expzero24>;
-
-def MVE_VANDIZ0v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero00inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ0v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero00inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ8v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero08inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ8v8i16 : MVEAsmPseudo<"vand${vp}.i16\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero08inv16:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ16v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero16inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
-def MVE_VANDIZ24v4i32 : MVEAsmPseudo<"vand${vp}.i32\t$Qda, $imm",
- (ins MQPR:$Qda_src, expzero24inv32:$imm, vpred_n:$vp), (outs MQPR:$Qda)>;
+defm MVE_VORRimmi16 : MVE_VORRimm<MVE_v8i16, nImmSplatI16>;
+defm MVE_VORRimmi32 : MVE_VORRimm<MVE_v4i32, nImmSplatI32>;
+defm MVE_VBICimmi16 : MVE_VBICimm<MVE_v8i16, nImmSplatI16>;
+defm MVE_VBICimmi32 : MVE_VBICimm<MVE_v4i32, nImmSplatI32>;
+
+def MVE_VORNimmi16 : MVEInstAlias<"vorn${vp}.i16\t$Qd, $imm",
+ (MVE_VORRimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
+def MVE_VORNimmi32 : MVEInstAlias<"vorn${vp}.i32\t$Qd, $imm",
+ (MVE_VORRimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
+
+def MVE_VANDimmi16 : MVEInstAlias<"vand${vp}.i16\t$Qd, $imm",
+ (MVE_VBICimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>;
+def MVE_VANDimmi32 : MVEInstAlias<"vand${vp}.i32\t$Qd, $imm",
+ (MVE_VBICimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>;
+
+def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm",
+ (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>;
class MVE_VMOV_lane_direction {
bit bit_20;
@@ -1494,6 +1635,8 @@ class MVE_VMOV_lane<string suffix, bit U, dag indexop,
let Inst{11-8} = 0b1011;
let Inst{7} = Qd{3};
let Inst{4-0} = 0b10000;
+
+ let hasSideEffects = 0;
}
class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
@@ -1557,10 +1700,14 @@ let Predicates = [HasMVEInt] in {
(MVE_VMOV_from_lane_s8 MQPR:$src, imm:$lane)>;
def : Pat<(ARMvgetlanes (v8i16 MQPR:$src), imm:$lane),
(MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
+ def : Pat<(ARMvgetlanes (v8f16 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_s16 MQPR:$src, imm:$lane)>;
def : Pat<(ARMvgetlaneu (v16i8 MQPR:$src), imm:$lane),
(MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane)>;
def : Pat<(ARMvgetlaneu (v8i16 MQPR:$src), imm:$lane),
(MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
+ def : Pat<(ARMvgetlaneu (v8f16 MQPR:$src), imm:$lane),
+ (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane)>;
def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
(MVE_VMOV_to_lane_8 (v16i8 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
@@ -1575,8 +1722,8 @@ let Predicates = [HasMVEInt] in {
def : Pat<(insertelt (v4f32 MQPR:$src1), (f32 SPR:$src2), imm:$lane),
(INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS MQPR:$src1, MQPR)), SPR:$src2, (SSubReg_f32_reg imm:$lane))>;
- def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane),
- (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>;
+ def : Pat<(insertelt (v8f16 MQPR:$src1), (f16 HPR:$src2), imm:$lane),
+ (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS (f16 HPR:$src2), rGPR), imm:$lane)>;
def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane),
(EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>;
def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane),
@@ -1588,8 +1735,8 @@ let Predicates = [HasMVEInt] in {
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v4f32 (scalar_to_vector GPR:$src)),
(MVE_VMOV_to_lane_32 (v4f32 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
- def : Pat<(v8f16 (scalar_to_vector HPR:$src)),
- (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
+ def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), (f16 HPR:$src), ssub_0)>;
def : Pat<(v8f16 (scalar_to_vector GPR:$src)),
(MVE_VMOV_to_lane_16 (v8f16 (IMPLICIT_DEF)), rGPR:$src, (i32 0))>;
}
@@ -1882,6 +2029,26 @@ class MVE_VRHADD_Base<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
+def addnuw : PatFrag<(ops node:$lhs, node:$rhs),
+ (add node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoUnsignedWrap();
+}]>;
+
+def addnsw : PatFrag<(ops node:$lhs, node:$rhs),
+ (add node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoSignedWrap();
+}]>;
+
+def subnuw : PatFrag<(ops node:$lhs, node:$rhs),
+ (sub node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoUnsignedWrap();
+}]>;
+
+def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
+ (sub node:$lhs, node:$rhs), [{
+ return N->getFlags().hasNoSignedWrap();
+}]>;
+
multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int> {
def "" : MVE_VRHADD_Base<VTI.Suffix, VTI.Unsigned, VTI.Size>;
@@ -1913,6 +2080,37 @@ defm MVE_VRHADDu8 : MVE_VRHADD<MVE_v16u8>;
defm MVE_VRHADDu16 : MVE_VRHADD<MVE_v8u16>;
defm MVE_VRHADDu32 : MVE_VRHADD<MVE_v4u32>;
+// Rounding Halving Add perform the arithemtic operation with an extra bit of
+// precision, before performing the shift, to void clipping errors. We're not
+// modelling that here with these patterns, but we're using no wrap forms of
+// add to ensure that the extra bit of information is not needed for the
+// arithmetic or the rounding.
+def : Pat<(v16i8 (ARMvshrsImm (addnsw (addnsw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v8i16 (ARMvshrsImm (addnsw (addnsw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v4i32 (ARMvshrsImm (addnsw (addnsw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v16i8 (ARMvshruImm (addnuw (addnuw (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)),
+ (v16i8 (ARMvmovImm (i32 3585)))),
+ (i32 1))),
+ (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v8i16 (ARMvshruImm (addnuw (addnuw (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)),
+ (v8i16 (ARMvmovImm (i32 2049)))),
+ (i32 1))),
+ (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>;
+def : Pat<(v4i32 (ARMvshruImm (addnuw (addnuw (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)),
+ (v4i32 (ARMvmovImm (i32 1)))),
+ (i32 1))),
+ (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>;
+
+
class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract,
bits<2> size, list<dag> pattern=[]>
: MVE_int<iname, suffix, size, pattern> {
@@ -1936,7 +2134,8 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size,
: MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode unpred_op, Intrinsic pred_int, PatFrag add_op,
+ SDNode shift_op> {
def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
@@ -1945,6 +2144,9 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
+ (Inst MQPR:$Qm, MQPR:$Qn)>;
+
// Predicated add-and-divide-by-two
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
@@ -1954,18 +2156,24 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
}
}
-multiclass MVE_VHADD<MVEVectorVTInfo VTI>
- : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
+multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op>
+ : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
+ shift_op>;
-defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8>;
-defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16>;
-defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32>;
-defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8>;
-defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16>;
-defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32>;
+// Halving add/sub perform the arithemtic operation with an extra bit of
+// precision, before performing the shift, to void clipping errors. We're not
+// modelling that here with these patterns, but we're using no wrap forms of
+// add/sub to ensure that the extra bit of information is not needed.
+defm MVE_VHADDs8 : MVE_VHADD<MVE_v16s8, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, addnsw, ARMvshrsImm>;
+defm MVE_VHADDu8 : MVE_VHADD<MVE_v16u8, addnuw, ARMvshruImm>;
+defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, addnuw, ARMvshruImm>;
+defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, addnuw, ARMvshruImm>;
multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
- SDNode unpred_op, Intrinsic pred_int> {
+ SDNode unpred_op, Intrinsic pred_int, PatFrag sub_op,
+ SDNode shift_op> {
def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
@@ -1975,6 +2183,10 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
(i32 VTI.Unsigned))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ def : Pat<(VTI.Vec (shift_op (sub_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
+ (Inst MQPR:$Qm, MQPR:$Qn)>;
+
+
// Predicated subtract-and-divide-by-two
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
@@ -1985,15 +2197,16 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
}
}
-multiclass MVE_VHSUB<MVEVectorVTInfo VTI>
- : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated>;
+multiclass MVE_VHSUB<MVEVectorVTInfo VTI, PatFrag sub_op, SDNode shift_op>
+ : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated, sub_op,
+ shift_op>;
-defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8>;
-defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16>;
-defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32>;
-defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8>;
-defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16>;
-defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32>;
+defm MVE_VHSUBs8 : MVE_VHSUB<MVE_v16s8, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8, subnuw, ARMvshruImm>;
+defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16, subnuw, ARMvshruImm>;
+defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32, subnuw, ARMvshruImm>;
class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
@@ -2028,24 +2241,37 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v4i32 (ARMvdup (i32 rGPR:$elem))),
(MVE_VDUP32 rGPR:$elem)>;
- def : Pat<(v4i32 (ARMvduplane (v4i32 MQPR:$src), imm:$lane)),
- (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
- // For the 16-bit and 8-bit vduplanes we don't care about the signedness
- // of the lane move operation as we only want the lowest 8/16 bits anyway.
- def : Pat<(v8i16 (ARMvduplane (v8i16 MQPR:$src), imm:$lane)),
- (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
- def : Pat<(v16i8 (ARMvduplane (v16i8 MQPR:$src), imm:$lane)),
- (MVE_VDUP8 (MVE_VMOV_from_lane_u8 MQPR:$src, imm:$lane))>;
-
- def : Pat<(v4f32 (ARMvdup (f32 SPR:$elem))),
- (v4f32 (MVE_VDUP32 (i32 (COPY_TO_REGCLASS (f32 SPR:$elem), rGPR))))>;
- def : Pat<(v8f16 (ARMvdup (f16 HPR:$elem))),
- (v8f16 (MVE_VDUP16 (i32 (COPY_TO_REGCLASS (f16 HPR:$elem), rGPR))))>;
+ def : Pat<(v8f16 (ARMvdup (i32 rGPR:$elem))),
+ (MVE_VDUP16 rGPR:$elem)>;
+ def : Pat<(v4f32 (ARMvdup (i32 rGPR:$elem))),
+ (MVE_VDUP32 rGPR:$elem)>;
- def : Pat<(v4f32 (ARMvduplane (v4f32 MQPR:$src), imm:$lane)),
- (MVE_VDUP32 (MVE_VMOV_from_lane_32 MQPR:$src, imm:$lane))>;
- def : Pat<(v8f16 (ARMvduplane (v8f16 MQPR:$src), imm:$lane)),
- (MVE_VDUP16 (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane))>;
+ // Match a vselect with an ARMvdup as a predicated MVE_VDUP
+ def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred),
+ (v16i8 (ARMvdup (i32 rGPR:$elem))),
+ (v16i8 MQPR:$inactive))),
+ (MVE_VDUP8 rGPR:$elem, ARMVCCThen, (v16i1 VCCR:$pred),
+ (v16i8 MQPR:$inactive))>;
+ def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred),
+ (v8i16 (ARMvdup (i32 rGPR:$elem))),
+ (v8i16 MQPR:$inactive))),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (v8i16 MQPR:$inactive))>;
+ def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred),
+ (v4i32 (ARMvdup (i32 rGPR:$elem))),
+ (v4i32 MQPR:$inactive))),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (v4i32 MQPR:$inactive))>;
+ def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred),
+ (v4f32 (ARMvdup (i32 rGPR:$elem))),
+ (v4f32 MQPR:$inactive))),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (v4f32 MQPR:$inactive))>;
+ def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred),
+ (v8f16 (ARMvdup (i32 rGPR:$elem))),
+ (v8f16 MQPR:$inactive))),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (v8f16 MQPR:$inactive))>;
}
@@ -2079,32 +2305,43 @@ class MVE_VCLSCLZ<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>;
-def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>;
-def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>;
+multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
+ SDNode unpred_op> {
+ def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>;
-def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
-def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
-def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_"#opname#"_predicated");
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
- (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
- def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
- (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
- def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
- (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
}
+defm MVE_VCLSs8 : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>;
+defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>;
+defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>;
+
+defm MVE_VCLZs8 : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>;
+defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>;
+defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>;
+
class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
- list<dag> pattern=[]>
+ bit saturate, list<dag> pattern=[]>
: MVEIntSingleSrc<iname, suffix, size, pattern> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
let Inst{21-20} = 0b11;
- let Inst{17-16} = 0b01;
- let Inst{12-8} = 0b00011;
+ let Inst{17} = 0b0;
+ let Inst{16} = !eq(saturate, 0);
+ let Inst{12-11} = 0b00;
+ let Inst{10} = saturate;
+ let Inst{9-8} = 0b11;
let Inst{7} = negate;
let Inst{6} = 0b1;
let Inst{4} = 0b0;
@@ -2112,61 +2349,40 @@ class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate,
let validForTailPredication = 1;
}
-def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>;
-def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>;
-def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>;
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (abs (v16i8 MQPR:$v))),
- (v16i8 (MVE_VABSs8 $v))>;
- def : Pat<(v8i16 (abs (v8i16 MQPR:$v))),
- (v8i16 (MVE_VABSs16 $v))>;
- def : Pat<(v4i32 (abs (v4i32 MQPR:$v))),
- (v4i32 (MVE_VABSs32 $v))>;
-}
+multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
+ SDNode unpred_op, Intrinsic pred_int,
+ MVEVectorVTInfo VTI> {
+ def "" : MVE_VABSNEG_int<iname, VTI.Suffix, VTI.Size, negate, saturate>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>;
-def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>;
-def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>;
+ let Predicates = [HasMVEInt] in {
+ // VQABS and VQNEG have more difficult isel patterns defined elsewhere
+ if !eq(saturate, 0) then {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
+ }
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))),
- (v16i8 (MVE_VNEGs8 $v))>;
- def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))),
- (v8i16 (MVE_VNEGs16 $v))>;
- def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))),
- (v4i32 (MVE_VNEGs32 $v))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ }
}
-class MVE_VQABSNEG<string iname, string suffix, bits<2> size,
- bit negate, list<dag> pattern=[]>
- : MVEIntSingleSrc<iname, suffix, size, pattern> {
-
- let Inst{28} = 0b1;
- let Inst{25-23} = 0b111;
- let Inst{21-20} = 0b11;
- let Inst{17-16} = 0b00;
- let Inst{12-8} = 0b00111;
- let Inst{7} = negate;
- let Inst{6} = 0b1;
- let Inst{4} = 0b0;
- let Inst{0} = 0b0;
- let validForTailPredication = 1;
+foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in {
+ defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vabs", 0, 0, abs, int_arm_mve_abs_predicated, VTI>;
+ defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vqabs", 0, 1, ?, int_arm_mve_qabs_predicated, VTI>;
+ defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vneg", 1, 0, vnegq, int_arm_mve_neg_predicated, VTI>;
+ defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m<
+ "vqneg", 1, 1, ?, int_arm_mve_qneg_predicated, VTI>;
}
-def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>;
-def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>;
-def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>;
-
-def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>;
-def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>;
-def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>;
-
// int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times
// zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert
multiclass vqabsneg_pattern<MVEVectorVTInfo VTI, dag int_min, dag int_max,
- dag zero_vec, MVE_VQABSNEG vqabs_instruction,
- MVE_VQABSNEG vqneg_instruction> {
+ dag zero_vec, MVE_VABSNEG_int vqabs_instruction,
+ MVE_VABSNEG_int vqneg_instruction> {
let Predicates = [HasMVEInt] in {
// The below tree can be replaced by a vqabs instruction, as it represents
// the following vectorized expression (r being the value in $reg):
@@ -2257,6 +2473,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VMOVimmi16 nImmSplatI16:$simm))>;
def : Pat<(v4i32 (ARMvmovImm timm:$simm)),
(v4i32 (MVE_VMOVimmi32 nImmVMOVI32:$simm))>;
+ def : Pat<(v2i64 (ARMvmovImm timm:$simm)),
+ (v2i64 (MVE_VMOVimmi64 nImmSplatI64:$simm))>;
def : Pat<(v8i16 (ARMvmvnImm timm:$simm)),
(v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm))>;
@@ -2265,6 +2483,15 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)),
(v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>;
+
+ def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+ MQPR:$inactive)),
+ (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm,
+ ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+ def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
+ MQPR:$inactive)),
+ (v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm,
+ ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
}
class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
@@ -2291,13 +2518,37 @@ class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-def MVE_VMAXAs8 : MVE_VMINMAXA<"vmaxa", "s8", 0b00, 0b0>;
-def MVE_VMAXAs16 : MVE_VMINMAXA<"vmaxa", "s16", 0b01, 0b0>;
-def MVE_VMAXAs32 : MVE_VMINMAXA<"vmaxa", "s32", 0b10, 0b0>;
+multiclass MVE_VMINMAXA_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int, bit bit_12> {
+ def "" : MVE_VMINMAXA<iname, VTI.Suffix, VTI.Size, bit_12>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated v(min|max)a
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qd), (abs (VTI.Vec MQPR:$Qm)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm)))>;
+
+ // Predicated v(min|max)a
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
+}
+
+multiclass MVE_VMINA<MVEVectorVTInfo VTI>
+ : MVE_VMINMAXA_m<"vmina", VTI, umin, int_arm_mve_vmina_predicated, 0b1>;
+
+defm MVE_VMINAs8 : MVE_VMINA<MVE_v16s8>;
+defm MVE_VMINAs16 : MVE_VMINA<MVE_v8s16>;
+defm MVE_VMINAs32 : MVE_VMINA<MVE_v4s32>;
-def MVE_VMINAs8 : MVE_VMINMAXA<"vmina", "s8", 0b00, 0b1>;
-def MVE_VMINAs16 : MVE_VMINMAXA<"vmina", "s16", 0b01, 0b1>;
-def MVE_VMINAs32 : MVE_VMINMAXA<"vmina", "s32", 0b10, 0b1>;
+multiclass MVE_VMAXA<MVEVectorVTInfo VTI>
+ : MVE_VMINMAXA_m<"vmaxa", VTI, umax, int_arm_mve_vmaxa_predicated, 0b0>;
+
+defm MVE_VMAXAs8 : MVE_VMAXA<MVE_v16s8>;
+defm MVE_VMAXAs16 : MVE_VMAXA<MVE_v8s16>;
+defm MVE_VMAXAs32 : MVE_VMAXA<MVE_v4s32>;
// end of MVE Integer instructions
@@ -2334,7 +2585,7 @@ class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
let Inst{3-1} = Qm{2-0};
}
-class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
+class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
iname, suffix, "$Qd, $Qm", vpred_r, "",
@@ -2344,25 +2595,36 @@ class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U,
let Inst{21} = 0b1;
let Inst{20-19} = sz{1-0};
let Inst{18-16} = 0b000;
+ let Inst{12} = top;
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let doubleWidthResult = 1;
}
-multiclass MVE_VMOVL_shift_half<string iname, string suffix, bits<2> sz, bit U,
- list<dag> pattern=[]> {
- def bh : MVE_VMOVL<!strconcat(iname, "b"), suffix, sz, U, pattern> {
- let Inst{12} = 0b0;
- }
- def th : MVE_VMOVL<!strconcat(iname, "t"), suffix, sz, U, pattern> {
- let Inst{12} = 0b1;
- }
+multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
+ MVEVectorVTInfo InVTI> {
+ def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size,
+ InVTI.Unsigned, top>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src),
+ (i32 InVTI.Unsigned), (i32 top),
+ (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Vec MQPR:$inactive))),
+ (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
+ (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Vec MQPR:$inactive)))>;
}
-defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>;
-defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>;
-defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>;
-defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>;
+defm MVE_VMOVLs8bh : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLs8th : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>;
+defm MVE_VMOVLu8bh : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLu8th : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>;
+defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>;
+defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>;
+defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>;
let Predicates = [HasMVEInt] in {
def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16),
@@ -2372,12 +2634,23 @@ let Predicates = [HasMVEInt] in {
def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8),
(MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>;
+ def : Pat<(sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src)))), v8i8),
+ (MVE_VMOVLs8th MQPR:$src)>;
+ def : Pat<(sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src)))), v4i16),
+ (MVE_VMOVLs16th MQPR:$src)>;
+
+ // zext_inreg 8 -> 16
+ def : Pat<(ARMvbicImm (v8i16 MQPR:$src), (i32 0xAFF)),
+ (MVE_VMOVLu8bh MQPR:$src)>;
// zext_inreg 16 -> 32
def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))),
(MVE_VMOVLu16bh MQPR:$src)>;
- // zext_inreg 8 -> 16
- def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))),
- (MVE_VMOVLu8bh MQPR:$src)>;
+ // Same zext_inreg with vrevs, picking the top half
+ def : Pat<(ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src)))), (i32 0xAFF)),
+ (MVE_VMOVLu8th MQPR:$src)>;
+ def : Pat<(and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src)))),
+ (v4i32 (ARMvmovImm (i32 0xCFF)))),
+ (MVE_VMOVLu16th MQPR:$src)>;
}
@@ -2395,6 +2668,8 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
// For the MVE_VSHLL_patterns multiclass to refer to
Operand immediateType = immtype;
+
+ let doubleWidthResult = 1;
}
// The immediate VSHLL instructions accept shift counts from 1 up to
@@ -2438,6 +2713,7 @@ class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
let Inst{11-6} = 0b111000;
let Inst{4} = 0b0;
let Inst{0} = 0b1;
+ let doubleWidthResult = 1;
}
multiclass MVE_VSHLL_lw<string iname, string suffix, bits<2> sz, bit U,
@@ -2472,17 +2748,17 @@ multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm,
(i32 VTI.Unsigned), (i32 top),
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
(i32 VTI.Unsigned), (i32 top),
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
- (VTI.Pred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
}
@@ -2509,6 +2785,8 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
let Inst{11-6} = 0b111111;
let Inst{4} = 0b0;
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
@@ -2550,6 +2828,8 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
let Inst{11-6} = 0b111111;
let Inst{4} = 0b0;
let Inst{0} = 0b0;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
@@ -2598,6 +2878,8 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
let Inst{11-6} = 0b111101;
let Inst{4} = 0b0;
let Inst{0} = bit_0;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
@@ -3131,41 +3413,34 @@ class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
}
-multiclass MVE_VRINT_ops<string suffix, bits<2> size, list<dag> pattern=[]> {
- def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>;
- def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>;
- def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>;
- def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>;
- def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>;
- def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>;
-}
+multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
+ SDNode unpred_op> {
+ def "": MVE_VRINT<suffix, opcode, VTI.Suffix, VTI.Size>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_vrint"#suffix#"_predicated");
-defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>;
-defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>;
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
+ (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ }
+}
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>;
- def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))),
- (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
- def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
- (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
+multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
+ defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>;
+ defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
+ defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
+ defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
+ defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
+ defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
}
+defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
+defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
+
class MVEFloatArithNeon<string iname, string suffix, bit size,
dag oops, dag iops, string ops,
vpred_ops vpred, string cstr, list<dag> pattern=[]>
@@ -3281,29 +3556,40 @@ class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
let Inst{8} = bit_8;
let Inst{7} = Qn{3};
let Inst{4} = bit_4;
+ let validForTailPredication = 1;
}
-def MVE_VFMAf32 : MVE_VADDSUBFMA_fp<"vfma", "f32", 0b0, 0b1, 0b0, 0b0,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-def MVE_VFMAf16 : MVE_VADDSUBFMA_fp<"vfma", "f16", 0b1, 0b1, 0b0, 0b0,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-
-def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
-def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1,
- (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0b1, 0b0, fms,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = int_arm_mve_fma_predicated;
+ defvar m1 = (VTI.Vec MQPR:$m1);
+ defvar m2 = (VTI.Vec MQPR:$m2);
+ defvar add = (VTI.Vec MQPR:$add);
+ defvar pred = (VTI.Pred VCCR:$pred);
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
- (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>;
- def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
- (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>;
- def : Pat<(v8f16 (fma (fneg (v8f16 MQPR:$src1)), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))),
- (v8f16 (MVE_VFMSf16 $src3, $src1, $src2))>;
- def : Pat<(v4f32 (fma (fneg (v4f32 MQPR:$src1)), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))),
- (v4f32 (MVE_VFMSf32 $src3, $src1, $src2))>;
+ let Predicates = [HasMVEFloat] in {
+ if fms then {
+ def : Pat<(VTI.Vec (fma (fneg m1), m2, add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (fma m1, (fneg m2), add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ } else {
+ def : Pat<(VTI.Vec (fma m1, m2, add)), (Inst $add, $m1, $m2)>;
+ def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ }
+ }
}
+defm MVE_VFMAf32 : MVE_VFMA_fp_multi<"vfma", 0, MVE_v4f32>;
+defm MVE_VFMAf16 : MVE_VFMA_fp_multi<"vfma", 0, MVE_v8f16>;
+defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
+defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
+
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
@@ -3423,10 +3709,10 @@ defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
- Operand imm_operand_type, list<dag> pattern=[]>
+ Operand imm_operand_type>
: MVE_float<"vcvt", suffix,
(outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
- "$Qd, $Qm, $imm6", vpred_r, "", pattern> {
+ "$Qd, $Qm, $imm6", vpred_r, "", []> {
bits<4> Qd;
bits<6> imm6;
@@ -3468,14 +3754,43 @@ class MVE_VCVT_fix_f16<string suffix, bit U, bit op>
let Inst{20} = 0b1;
}
-def MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16<"f16.s16", 0b0, 0b0>;
-def MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16<"s16.f16", 0b0, 0b1>;
-def MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16<"f16.u16", 0b1, 0b0>;
-def MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16<"u16.f16", 0b1, 0b1>;
-def MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32<"f32.s32", 0b0, 0b0>;
-def MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32<"s32.f32", 0b0, 0b1>;
-def MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32<"f32.u32", 0b1, 0b0>;
-def MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32<"u32.f32", 0b1, 0b1>;
+multiclass MVE_VCVT_fix_patterns<Instruction Inst, bit U, MVEVectorVTInfo DestVTI,
+ MVEVectorVTInfo SrcVTI> {
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(DestVTI.Vec (int_arm_mve_vcvt_fix
+ (i32 U), (SrcVTI.Vec MQPR:$Qm), imm:$scale)),
+ (DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale))>;
+ def : Pat<(DestVTI.Vec (int_arm_mve_vcvt_fix_predicated (i32 U),
+ (DestVTI.Vec MQPR:$inactive),
+ (SrcVTI.Vec MQPR:$Qm),
+ imm:$scale,
+ (DestVTI.Pred VCCR:$mask))),
+ (DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale,
+ ARMVCCThen, (DestVTI.Pred VCCR:$mask),
+ (DestVTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VCVT_fix_f32_m<bit U, bit op,
+ MVEVectorVTInfo DestVTI, MVEVectorVTInfo SrcVTI> {
+ def "" : MVE_VCVT_fix_f32<DestVTI.Suffix#"."#SrcVTI.Suffix, U, op>;
+ defm : MVE_VCVT_fix_patterns<!cast<Instruction>(NAME), U, DestVTI, SrcVTI>;
+}
+
+multiclass MVE_VCVT_fix_f16_m<bit U, bit op,
+ MVEVectorVTInfo DestVTI, MVEVectorVTInfo SrcVTI> {
+ def "" : MVE_VCVT_fix_f16<DestVTI.Suffix#"."#SrcVTI.Suffix, U, op>;
+ defm : MVE_VCVT_fix_patterns<!cast<Instruction>(NAME), U, DestVTI, SrcVTI>;
+}
+
+defm MVE_VCVTf16s16_fix : MVE_VCVT_fix_f16_m<0b0, 0b0, MVE_v8f16, MVE_v8s16>;
+defm MVE_VCVTs16f16_fix : MVE_VCVT_fix_f16_m<0b0, 0b1, MVE_v8s16, MVE_v8f16>;
+defm MVE_VCVTf16u16_fix : MVE_VCVT_fix_f16_m<0b1, 0b0, MVE_v8f16, MVE_v8u16>;
+defm MVE_VCVTu16f16_fix : MVE_VCVT_fix_f16_m<0b1, 0b1, MVE_v8u16, MVE_v8f16>;
+defm MVE_VCVTf32s32_fix : MVE_VCVT_fix_f32_m<0b0, 0b0, MVE_v4f32, MVE_v4s32>;
+defm MVE_VCVTs32f32_fix : MVE_VCVT_fix_f32_m<0b0, 0b1, MVE_v4s32, MVE_v4f32>;
+defm MVE_VCVTf32u32_fix : MVE_VCVT_fix_f32_m<0b1, 0b0, MVE_v4f32, MVE_v4u32>;
+defm MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32_m<0b1, 0b1, MVE_v4u32, MVE_v4f32>;
class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
bits<2> rm, list<dag> pattern=[]>
@@ -3497,23 +3812,44 @@ class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
let validForTailPredication = 1;
}
-multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op,
- list<dag> pattern=[]> {
- def a : MVE_VCVT_fp_int_anpm<suffix, size, op, "a", 0b00>;
- def n : MVE_VCVT_fp_int_anpm<suffix, size, op, "n", 0b01>;
- def p : MVE_VCVT_fp_int_anpm<suffix, size, op, "p", 0b10>;
- def m : MVE_VCVT_fp_int_anpm<suffix, size, op, "m", 0b11>;
+multiclass MVE_VCVT_fp_int_anpm_inner<MVEVectorVTInfo Int, MVEVectorVTInfo Flt,
+ string anpm, bits<2> rm> {
+ def "": MVE_VCVT_fp_int_anpm<Int.Suffix # "." # Flt.Suffix, Int.Size,
+ Int.Unsigned, anpm, rm>;
+
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar IntrBaseName = "int_arm_mve_vcvt" # anpm;
+ defvar UnpredIntr = !cast<Intrinsic>(IntrBaseName);
+ defvar PredIntr = !cast<Intrinsic>(IntrBaseName # "_predicated");
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(Int.Vec (UnpredIntr (i32 Int.Unsigned), (Flt.Vec MQPR:$in))),
+ (Int.Vec (Inst (Flt.Vec MQPR:$in)))>;
+
+ def : Pat<(Int.Vec (PredIntr (i32 Int.Unsigned), (Int.Vec MQPR:$inactive),
+ (Flt.Vec MQPR:$in), (Flt.Pred VCCR:$pred))),
+ (Int.Vec (Inst (Flt.Vec MQPR:$in), ARMVCCThen,
+ (Flt.Pred VCCR:$pred), (Int.Vec MQPR:$inactive)))>;
+ }
+}
+
+multiclass MVE_VCVT_fp_int_anpm_outer<MVEVectorVTInfo Int,
+ MVEVectorVTInfo Flt> {
+ defm a : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "a", 0b00>;
+ defm n : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "n", 0b01>;
+ defm p : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "p", 0b10>;
+ defm m : MVE_VCVT_fp_int_anpm_inner<Int, Flt, "m", 0b11>;
}
// This defines instructions such as MVE_VCVTu16f16a, with an explicit
// rounding-mode suffix on the mnemonic. The class below will define
// the bare MVE_VCVTu16f16 (with implied rounding toward zero).
-defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_multi<"s16.f16", 0b01, 0b0>;
-defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_multi<"u16.f16", 0b01, 0b1>;
-defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>;
-defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>;
+defm MVE_VCVTs16f16 : MVE_VCVT_fp_int_anpm_outer<MVE_v8s16, MVE_v8f16>;
+defm MVE_VCVTu16f16 : MVE_VCVT_fp_int_anpm_outer<MVE_v8u16, MVE_v8f16>;
+defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4s32, MVE_v4f32>;
+defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4u32, MVE_v4f32>;
-class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
+class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
list<dag> pattern=[]>
: MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
@@ -3527,41 +3863,43 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op,
let Inst{17-16} = 0b11;
let Inst{15-13} = Qd{2-0};
let Inst{12-9} = 0b0011;
- let Inst{8-7} = op;
+ let Inst{8} = toint;
+ let Inst{7} = unsigned;
let Inst{4} = 0b0;
let validForTailPredication = 1;
}
+multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
+ SDNode unpred_op> {
+ defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u"));
+ defvar ToInt = !eq(Src.SuffixLetter,"f");
+
+ def "" : MVE_VCVT_fp_int<Dest.Suffix # "." # Src.Suffix, Dest.Size,
+ ToInt, Unsigned>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))),
+ (Dest.Vec (Inst (Src.Vec MQPR:$src)))>;
+ def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated
+ (Src.Vec MQPR:$src), (i32 Unsigned),
+ (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
+ (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
+ (Src.Pred VCCR:$mask),
+ (Dest.Vec MQPR:$inactive)))>;
+ }
+}
// The unsuffixed VCVT for float->int implicitly rounds toward zero,
// which I reflect here in the llvm instruction names
-def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>;
-def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>;
-def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>;
-def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>;
+defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m<MVE_v8s16, MVE_v8f16, fp_to_sint>;
+defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m<MVE_v8u16, MVE_v8f16, fp_to_uint>;
+defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m<MVE_v4s32, MVE_v4f32, fp_to_sint>;
+defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m<MVE_v4u32, MVE_v4f32, fp_to_uint>;
// Whereas VCVT for int->float rounds to nearest
-def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>;
-def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>;
-def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>;
-def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))),
- (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>;
- def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))),
- (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>;
- def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))),
- (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>;
- def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))),
- (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>;
- def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))),
- (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>;
- def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))),
- (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>;
- def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))),
- (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>;
- def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))),
- (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>;
-}
+defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8s16, sint_to_fp>;
+defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
+defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
+defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
list<dag> pattern=[]>
@@ -3582,26 +3920,29 @@ class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
let validForTailPredication = 1;
}
-def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>;
-def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>;
-
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fabs MQPR:$src)),
- (MVE_VABSf16 MQPR:$src)>;
- def : Pat<(v4f32 (fabs MQPR:$src)),
- (MVE_VABSf32 MQPR:$src)>;
-}
+multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
+ MVEVectorVTInfo VTI, bit opcode> {
+ def "" : MVE_VABSNEG_fp<iname, VTI.Suffix, VTI.Size, opcode>;
+ defvar Inst = !cast<Instruction>(NAME);
-def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>;
-def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>;
-let Predicates = [HasMVEFloat] in {
- def : Pat<(v8f16 (fneg MQPR:$src)),
- (MVE_VNEGf16 MQPR:$src)>;
- def : Pat<(v4f32 (fneg MQPR:$src)),
- (MVE_VNEGf32 MQPR:$src)>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive))),
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ }
}
+defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+ MVE_v8f16, 0>;
+defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated,
+ MVE_v4f32, 0>;
+defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+ MVE_v8f16, 1>;
+defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
+ MVE_v4f32, 1>;
+
class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
list<dag> pattern=[]>
: MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
@@ -3623,11 +3964,37 @@ class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
let Inst{0} = 0b1;
}
-def MVE_VMAXNMAf32 : MVE_VMAXMINNMA<"vmaxnma", "f32", 0b0, 0b0>;
-def MVE_VMAXNMAf16 : MVE_VMAXMINNMA<"vmaxnma", "f16", 0b1, 0b0>;
+multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
+ SDNode unpred_op, Intrinsic pred_int,
+ bit bit_12> {
+ def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size{0}, bit_12>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated v(max|min)nma
+ def : Pat<(VTI.Vec (unpred_op (fabs (VTI.Vec MQPR:$Qd)),
+ (fabs (VTI.Vec MQPR:$Qm)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm)))>;
+
+ // Predicated v(max|min)nma
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
+}
+
+multiclass MVE_VMAXNMA<MVEVectorVTInfo VTI, bit bit_12>
+ : MVE_VMAXMINNMA_m<"vmaxnma", VTI, fmaxnum, int_arm_mve_vmaxnma_predicated, bit_12>;
+
+defm MVE_VMAXNMAf32 : MVE_VMAXNMA<MVE_v4f32, 0b0>;
+defm MVE_VMAXNMAf16 : MVE_VMAXNMA<MVE_v8f16, 0b0>;
-def MVE_VMINNMAf32 : MVE_VMAXMINNMA<"vminnma", "f32", 0b0, 0b1>;
-def MVE_VMINNMAf16 : MVE_VMAXMINNMA<"vminnma", "f16", 0b1, 0b1>;
+multiclass MVE_VMINNMA<MVEVectorVTInfo VTI, bit bit_12>
+ : MVE_VMAXMINNMA_m<"vminnma", VTI, fminnum, int_arm_mve_vminnma_predicated, bit_12>;
+
+defm MVE_VMINNMAf32 : MVE_VMINNMA<MVE_v4f32, 0b1>;
+defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
// end of MVE Floating Point instructions
@@ -3796,12 +4163,12 @@ multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
- def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
- def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
+ def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc))>;
+ def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc))>;
+ def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
@@ -3810,12 +4177,12 @@ multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
}
multiclass unpred_vcmpf_z<PatLeaf fc> {
@@ -3825,31 +4192,31 @@ multiclass unpred_vcmpf_z<PatLeaf fc> {
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
- (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
}
multiclass unpred_vcmpf_r<int fc> {
- def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
- (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
- def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
- (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
+ def : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)),
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
+ def : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)),
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
- def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
- def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
+ def : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc))>;
+ def : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
- def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)))),
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)))),
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
}
let Predicates = [HasMVEInt] in {
@@ -3889,7 +4256,7 @@ let Predicates = [HasMVEFloat] in {
}
-// Extra "worst case" and/or/xor partterns, going into and out of GRP
+// Extra "worst case" and/or/xor patterns, going into and out of GRP
multiclass two_predops<SDPatternOperator opnode, Instruction insn> {
def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))),
(v16i1 (COPY_TO_REGCLASS
@@ -3918,7 +4285,6 @@ let Predicates = [HasMVEInt] in {
// example when moving between rGPR and VPR.P0 as part of predicate vector
// shuffles. We also sometimes need to cast between different predicate
// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.
-
def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;
let Predicates = [HasMVEInt] in {
@@ -3932,6 +4298,16 @@ let Predicates = [HasMVEInt] in {
def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
}
+
+ // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
+ // rather than the more general 'ARMVectorRegCast' which would also
+ // match some bitconverts. If we use the latter in cases where the
+ // input and output types are the same, the bitconvert gets elided
+ // and we end up generating a nonsense match of nothing.
+
+ foreach VT = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
+ foreach VT2 = [ v16i8, v8i16, v8f16, v4i32, v4f32, v2i64, v2f64 ] in
+ def : Pat<(VT (ARMVectorRegCastImpl (VT2 MQPR:$src))), (VT MQPR:$src)>;
}
// end of MVE compares
@@ -3973,11 +4349,32 @@ class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
let Inst{0} = round;
}
+multiclass MVE_VQxDMLxDH_p<string iname, bit exch, bit round, bit subtract,
+ MVEVectorVTInfo VTI> {
+ def "": MVE_VQxDMLxDH<iname, exch, round, subtract, VTI.Suffix, VTI.Size,
+ !if(!eq(VTI.LaneBits, 32), ",@earlyclobber $Qd", "")>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar ConstParams = (? (i32 exch), (i32 round), (i32 subtract));
+ defvar unpred_intr = int_arm_mve_vqdmlad;
+ defvar pred_intr = int_arm_mve_vqdmlad_predicated;
+
+ def : Pat<(VTI.Vec !con((unpred_intr (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c)), ConstParams)),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c)))>;
+ def : Pat<(VTI.Vec !con((pred_intr (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c)), ConstParams,
+ (? (VTI.Pred VCCR:$pred)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
+ (VTI.Vec MQPR:$c),
+ ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+}
+
multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
bit round, bit subtract> {
- def s8 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8", 0b00>;
- def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>;
- def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10, ",@earlyclobber $Qd">;
+ defm s8 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v16s8>;
+ defm s16 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v8s16>;
+ defm s32 : MVE_VQxDMLxDH_p<iname, exch, round, subtract, MVE_v4s32>;
}
defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>;
@@ -4051,6 +4448,7 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
let Inst{7} = Qn{3};
let Inst{0} = 0b0;
let validForTailPredication = 1;
+ let doubleWidthResult = 1;
}
multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
@@ -4072,10 +4470,10 @@ multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
// Predicated multiply
def : Pat<(VTI.DblVec !con((pred_int (VTI.Vec MQPR:$Qm),
(VTI.Vec MQPR:$Qn)),
- uflag, (? (i32 Top), (VTI.Pred VCCR:$mask),
+ uflag, (? (i32 Top), (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -4122,6 +4520,50 @@ defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
int_arm_mve_mull_poly_predicated, 0b1>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v2i64 (ARMvmulls (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
+ (MVE_VMULLBs32 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(v2i64 (ARMvmulls (v4i32 (ARMvrev64 (v4i32 MQPR:$src1))),
+ (v4i32 (ARMvrev64 (v4i32 MQPR:$src2))))),
+ (MVE_VMULLTs32 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (sext_inreg (v4i32 MQPR:$src1), v4i16),
+ (sext_inreg (v4i32 MQPR:$src2), v4i16)),
+ (MVE_VMULLBs16 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src1)))), v4i16),
+ (sext_inreg (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src2)))), v4i16)),
+ (MVE_VMULLTs16 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (sext_inreg (v8i16 MQPR:$src1), v8i8),
+ (sext_inreg (v8i16 MQPR:$src2), v8i8)),
+ (MVE_VMULLBs8 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src1)))), v8i8),
+ (sext_inreg (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src2)))), v8i8)),
+ (MVE_VMULLTs8 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(v2i64 (ARMvmullu (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
+ (MVE_VMULLBu32 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(v2i64 (ARMvmullu (v4i32 (ARMvrev64 (v4i32 MQPR:$src1))),
+ (v4i32 (ARMvrev64 (v4i32 MQPR:$src2))))),
+ (MVE_VMULLTu32 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (and (v4i32 MQPR:$src1), (v4i32 (ARMvmovImm (i32 0xCFF)))),
+ (and (v4i32 MQPR:$src2), (v4i32 (ARMvmovImm (i32 0xCFF))))),
+ (MVE_VMULLBu16 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src1)))),
+ (v4i32 (ARMvmovImm (i32 0xCFF)))),
+ (and (v4i32 (ARMVectorRegCast (ARMvrev32 (v8i16 MQPR:$src2)))),
+ (v4i32 (ARMvmovImm (i32 0xCFF))))),
+ (MVE_VMULLTu16 MQPR:$src1, MQPR:$src2)>;
+
+ def : Pat<(mul (ARMvbicImm (v8i16 MQPR:$src1), (i32 0xAFF)),
+ (ARMvbicImm (v8i16 MQPR:$src2), (i32 0xAFF))),
+ (MVE_VMULLBu8 MQPR:$src1, MQPR:$src2)>;
+ def : Pat<(mul (ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src1)))), (i32 0xAFF)),
+ (ARMvbicImm (v8i16 (ARMVectorRegCast (ARMvrev16 (v16i8 MQPR:$src2)))), (i32 0xAFF))),
+ (MVE_VMULLTu8 MQPR:$src1, MQPR:$src2)>;
+}
+
class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
@@ -4195,6 +4637,8 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
let Inst{8} = 0b0;
let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
let Inst{0} = 0b1;
+ let validForTailPredication = 1;
+ let retainsPreviousHalfElement = 1;
}
multiclass MVE_VxMOVxN_halves<string iname, string suffix,
@@ -4213,21 +4657,121 @@ defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>;
defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>;
def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>;
+
+multiclass MVE_VMOVN_p<Instruction Inst, bit top,
+ MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
+ // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even
+ // lanes of a (depending on t) with the even lanes of b.
+ def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qm), (i32 top))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+
+ if !eq(top, 0) then {
+ // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd
+ // lanes of a with the odd lanes of b. In other words, the lanes we're
+ // _keeping_ from a are the even ones. So we can flip it round and say that
+ // this is the same as overwriting the even lanes of b with the even lanes
+ // of a, i.e. it's a VMOVNB with the operands reversed.
+ defvar vrev = !cast<SDNode>("ARMvrev" # InVTI.LaneBits);
+ def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>;
+ }
+
+ // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input
+ // as having wider lanes that we're narrowing, instead of already-narrow
+ // lanes that we're taking every other one of.
+ def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm), (i32 top),
+ (InVTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+}
+
+defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi32th, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16bh, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VMOVN_p<MVE_VMOVNi16th, 1, MVE_v16i8, MVE_v8i16>;
+
+multiclass MVE_VQMOVN_p<Instruction Inst, bit outU, bit inU, bit top,
+ MVEVectorVTInfo VTI, MVEVectorVTInfo InVTI> {
+ def : Pat<(VTI.Vec (int_arm_mve_vqmovn (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ (i32 outU), (i32 inU), (i32 top))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(VTI.Vec (int_arm_mve_vqmovn_predicated (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ (i32 outU), (i32 inU), (i32 top),
+ (InVTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (InVTI.Vec MQPR:$Qm),
+ ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+}
+
+defm : MVE_VQMOVN_p<MVE_VQMOVNs32bh, 0, 0, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNs32th, 0, 0, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNs16bh, 0, 0, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNs16th, 0, 0, 1, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu32bh, 1, 1, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu32th, 1, 1, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu16bh, 1, 1, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVNu16th, 1, 1, 1, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs32bh, 1, 0, 0, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs32th, 1, 0, 1, MVE_v8i16, MVE_v4i32>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs16bh, 1, 0, 0, MVE_v16i8, MVE_v8i16>;
+defm : MVE_VQMOVN_p<MVE_VQMOVUNs16th, 1, 0, 1, MVE_v16i8, MVE_v8i16>;
+
+def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVec<2>, SDTCisVT<3, i32>]>;
+def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>;
+def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>;
+
let Predicates = [HasMVEInt] in {
- def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
- (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
- def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
- (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
- def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))),
- (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
- def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))),
- (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>;
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
+ (v8i16 (MVE_VQMOVNs32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
+ (v8i16 (MVE_VQMOVNs32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
+ (v16i8 (MVE_VQMOVNs16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
+ (v16i8 (MVE_VQMOVNs16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
+ (v8i16 (MVE_VQMOVNu32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
+ (v8i16 (MVE_VQMOVNu32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
+ (v16i8 (MVE_VQMOVNu16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
+ (v16i8 (MVE_VQMOVNu16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
+
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))),
+ (v8i16 (MVE_VQSHRNbhs32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))),
+ (v16i8 (MVE_VQSHRNbhs16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
+ def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))),
+ (v8i16 (MVE_VQSHRNths32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))),
+ (v16i8 (MVE_VQSHRNths16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
+
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))),
+ (v8i16 (MVE_VQSHRNbhu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))),
+ (v16i8 (MVE_VQSHRNbhu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
+ def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))),
+ (v8i16 (MVE_VQSHRNthu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>;
+ def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))),
+ (v16i8 (MVE_VQSHRNthu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>;
}
class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
- list<dag> pattern=[]>
- : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
- "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", pattern> {
+ dag iops_extra, vpred_ops vpred, string cstr>
+ : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
+ !con(iops_extra, (ins MQPR:$Qm)), "$Qd, $Qm",
+ vpred, cstr, []> {
let Inst{28} = op;
let Inst{21-16} = 0b111111;
let Inst{12} = T;
@@ -4235,10 +4779,17 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
let Inst{0} = 0b1;
let Predicates = [HasMVEFloat];
+ let retainsPreviousHalfElement = 1;
}
+def SDTARMVCVTL : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisVT<2, i32>]>;
+def MVEvcvtn : SDNode<"ARMISD::VCVTN", SDTARMVMOVNQ>;
+def MVEvcvtl : SDNode<"ARMISD::VCVTL", SDTARMVCVTL>;
+
multiclass MVE_VCVT_f2h_m<string iname, int half> {
- def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half>;
+ def "": MVE_VCVT_ff<iname, "f16.f32", 0b0, half,
+ (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -4250,11 +4801,28 @@ multiclass MVE_VCVT_f2h_m<string iname, int half> {
(v4i1 VCCR:$mask))),
(v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
ARMVCCThen, (v4i1 VCCR:$mask)))>;
+
+ def : Pat<(v8f16 (MVEvcvtn (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
+ (v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
}
}
multiclass MVE_VCVT_h2f_m<string iname, int half> {
- def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half>;
+ def "": MVE_VCVT_ff<iname, "f32.f16", 0b1, half, (ins), vpred_r, "">;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4f32 (int_arm_mve_vcvt_widen (v8f16 MQPR:$Qm), (i32 half))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
+ def : Pat<(v4f32 (int_arm_mve_vcvt_widen_predicated
+ (v4f32 MQPR:$inactive), (v8f16 MQPR:$Qm), (i32 half),
+ (v4i1 VCCR:$mask))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm), ARMVCCThen,
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive)))>;
+
+ def : Pat<(v4f32 (MVEvcvtl (v8f16 MQPR:$Qm), (i32 half))),
+ (v4f32 (Inst (v8f16 MQPR:$Qm)))>;
+ }
}
defm MVE_VCVTf16f32bh : MVE_VCVT_f2h_m<"vcvtb", 0b0>;
@@ -4353,15 +4921,37 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
let Inst{7} = Qn{3};
let Inst{0} = 0b1;
let validForTailPredication = 1;
+ let doubleWidthResult = 1;
+}
+
+multiclass MVE_VQDMULL_m<string iname, MVEVectorVTInfo VTI, bit size, bit T,
+ string cstr> {
+ def "" : MVE_VQDMULL<iname, VTI.Suffix, size, T, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull (VTI.Vec MQPR:$Qm),
+ (VTI.Vec MQPR:$Qn), (i32 T))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
+ // Predicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull_predicated
+ (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ (i32 T), (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ }
}
-multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> {
- def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>;
- def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>;
+multiclass MVE_VQDMULL_halves<MVEVectorVTInfo VTI, bit size, string cstr=""> {
+ defm bh : MVE_VQDMULL_m<"vqdmullb", VTI, size, 0b0, cstr>;
+ defm th : MVE_VQDMULL_m<"vqdmullt", VTI, size, 0b1, cstr>;
}
-defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>;
-defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">;
+defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<MVE_v8s16, 0b0>;
+defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
// end of mve_qDest_qSrc
@@ -4407,10 +4997,61 @@ class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
let Inst{3-0} = Rm{3-0};
}
+// Patterns for vector-scalar instructions with integer operands
+multiclass MVE_vec_scalar_int_pat_m<Instruction inst, MVEVectorVTInfo VTI,
+ SDNode unpred_op, SDNode pred_op,
+ bit unpred_has_sign = 0,
+ bit pred_has_sign = 0> {
+ defvar UnpredSign = !if(unpred_has_sign, (? (i32 VTI.Unsigned)), (?));
+ defvar PredSign = !if(pred_has_sign, (? (i32 VTI.Unsigned)), (?));
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated version
+ def : Pat<(VTI.Vec !con((unpred_op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val))),
+ UnpredSign)),
+ (VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Predicated version
+ def : Pat<(VTI.Vec !con((pred_op (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val))),
+ PredSign,
+ (pred_op (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))),
+ (VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+ }
+}
+
+// Patterns for vector-scalar instructions with FP operands
+multiclass MVE_vec_scalar_fp_pat_m<SDNode unpred_op, Intrinsic pred_int,
+ Instruction instr_f16,
+ Instruction instr_f32> {
+ let Predicates = [HasMVEFloat] in {
+ // Unpredicated F16
+ def : Pat<(v8f16 (unpred_op (v8f16 MQPR:$Qm), (v8f16 (ARMvdup rGPR:$val)))),
+ (v8f16 (instr_f16 (v8f16 MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Unpredicated F32
+ def : Pat<(v4f32 (unpred_op (v4f32 MQPR:$Qm), (v4f32 (ARMvdup rGPR:$val)))),
+ (v4f32 (instr_f32 (v4f32 MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Predicated F16
+ def : Pat<(v8f16 (pred_int (v8f16 MQPR:$Qm), (v8f16 (ARMvdup rGPR:$val)),
+ (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))),
+ (v8f16 (instr_f16 (v8f16 MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (v8i1 VCCR:$mask),
+ (v8f16 MQPR:$inactive)))>;
+ // Predicated F32
+ def : Pat<(v4f32 (pred_int (v4f32 MQPR:$Qm), (v4f32 (ARMvdup rGPR:$val)),
+ (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))),
+ (v4f32 (instr_f32 (v4f32 MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (v4i1 VCCR:$mask),
+ (v4f32 MQPR:$inactive)))>;
+ }
+}
+
class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
- bit bit_5, bit bit_12, bit bit_16,
- bit bit_28, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ bit bit_5, bit bit_12, bit bit_16, bit bit_28>
+ : MVE_qDest_rSrc<iname, suffix, ""> {
let Inst{28} = bit_28;
let Inst{21-20} = size;
@@ -4421,42 +5062,60 @@ class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
let validForTailPredication = 1;
}
-multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix,
- bit bit_5, bit bit_12, bit bit_16,
- bit bit_28, list<dag> pattern=[]> {
- def "8" : MVE_VADDSUB_qr<iname, suffix#"8", 0b00,
- bit_5, bit_12, bit_16, bit_28>;
- def "16" : MVE_VADDSUB_qr<iname, suffix#"16", 0b01,
- bit_5, bit_12, bit_16, bit_28>;
- def "32" : MVE_VADDSUB_qr<iname, suffix#"32", 0b10,
- bit_5, bit_12, bit_16, bit_28>;
-}
-
-defm MVE_VADD_qr_i : MVE_VADDSUB_qr_sizes<"vadd", "i", 0b0, 0b0, 0b1, 0b0>;
-defm MVE_VQADD_qr_s : MVE_VADDSUB_qr_sizes<"vqadd", "s", 0b1, 0b0, 0b0, 0b0>;
-defm MVE_VQADD_qr_u : MVE_VADDSUB_qr_sizes<"vqadd", "u", 0b1, 0b0, 0b0, 0b1>;
-
-defm MVE_VSUB_qr_i : MVE_VADDSUB_qr_sizes<"vsub", "i", 0b0, 0b1, 0b1, 0b0>;
-defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>;
-defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>;
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
- (v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
- (v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
- (v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
-}
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
- (v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
- (v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
- (v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
-}
+// Vector-scalar add/sub
+multiclass MVE_VADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode unpred_op, Intrinsic pred_int> {
+ def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b0, subtract, 0b1, 0b0>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ unpred_op, pred_int>;
+}
+
+multiclass MVE_VADD_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_qr_m<"vadd", VTI, 0b0, add, int_arm_mve_add_predicated>;
+
+multiclass MVE_VSUB_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VADDSUB_qr_m<"vsub", VTI, 0b1, sub, int_arm_mve_sub_predicated>;
+
+defm MVE_VADD_qr_i8 : MVE_VADD_qr_m<MVE_v16i8>;
+defm MVE_VADD_qr_i16 : MVE_VADD_qr_m<MVE_v8i16>;
+defm MVE_VADD_qr_i32 : MVE_VADD_qr_m<MVE_v4i32>;
+
+defm MVE_VSUB_qr_i8 : MVE_VSUB_qr_m<MVE_v16i8>;
+defm MVE_VSUB_qr_i16 : MVE_VSUB_qr_m<MVE_v8i16>;
+defm MVE_VSUB_qr_i32 : MVE_VSUB_qr_m<MVE_v4i32>;
+
+// Vector-scalar saturating add/sub
+multiclass MVE_VQADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ SDNode unpred_op_s, SDNode unpred_op_u,
+ Intrinsic pred_int> {
+ def "" : MVE_VADDSUB_qr<iname, VTI.Suffix, VTI.Size, 0b1, subtract,
+ 0b0, VTI.Unsigned>;
+ defvar unpred_op = !if(VTI.Unsigned, unpred_op_u, unpred_op_s);
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ unpred_op, pred_int, 0, 1>;
+}
+
+multiclass MVE_VQADD_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VQADDSUB_qr_m<"vqadd", VTI, 0b0, saddsat, uaddsat,
+ int_arm_mve_qadd_predicated>;
+
+multiclass MVE_VQSUB_qr_m<MVEVectorVTInfo VTI>
+ : MVE_VQADDSUB_qr_m<"vqsub", VTI, 0b1, ssubsat, usubsat,
+ int_arm_mve_qsub_predicated>;
+
+defm MVE_VQADD_qr_s8 : MVE_VQADD_qr_m<MVE_v16s8>;
+defm MVE_VQADD_qr_s16 : MVE_VQADD_qr_m<MVE_v8s16>;
+defm MVE_VQADD_qr_s32 : MVE_VQADD_qr_m<MVE_v4s32>;
+defm MVE_VQADD_qr_u8 : MVE_VQADD_qr_m<MVE_v16u8>;
+defm MVE_VQADD_qr_u16 : MVE_VQADD_qr_m<MVE_v8u16>;
+defm MVE_VQADD_qr_u32 : MVE_VQADD_qr_m<MVE_v4u32>;
+
+defm MVE_VQSUB_qr_s8 : MVE_VQSUB_qr_m<MVE_v16s8>;
+defm MVE_VQSUB_qr_s16 : MVE_VQSUB_qr_m<MVE_v8s16>;
+defm MVE_VQSUB_qr_s32 : MVE_VQSUB_qr_m<MVE_v4s32>;
+defm MVE_VQSUB_qr_u8 : MVE_VQSUB_qr_m<MVE_v16u8>;
+defm MVE_VQSUB_qr_u16 : MVE_VQSUB_qr_m<MVE_v8u16>;
+defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32>;
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
bit T, string cstr="", list<dag> pattern=[]>
@@ -4469,15 +5128,40 @@ class MVE_VQDMULL_qr<string iname, string suffix, bit size,
let Inst{8} = 0b1;
let Inst{5} = 0b1;
let validForTailPredication = 1;
+ let doubleWidthResult = 1;
}
-multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> {
- def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>;
- def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>;
+multiclass MVE_VQDMULL_qr_m<string iname, MVEVectorVTInfo VTI, bit size,
+ bit T, string cstr> {
+ def "" : MVE_VQDMULL_qr<iname, VTI.Suffix, size, T, cstr>;
+ defvar Inst = !cast<Instruction>(NAME);
+
+ let Predicates = [HasMVEInt] in {
+ // Unpredicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val)),
+ (i32 T))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val)))>;
+ // Predicated saturating multiply
+ def : Pat<(VTI.DblVec (int_arm_mve_vqdmull_predicated
+ (VTI.Vec MQPR:$Qm),
+ (VTI.Vec (ARMvdup rGPR:$val)),
+ (i32 T),
+ (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive))),
+ (VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ (VTI.DblVec MQPR:$inactive)))>;
+ }
}
-defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>;
-defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">;
+multiclass MVE_VQDMULL_qr_halves<MVEVectorVTInfo VTI, bit size, string cstr=""> {
+ defm bh : MVE_VQDMULL_qr_m<"vqdmullb", VTI, size, 0b0, cstr>;
+ defm th : MVE_VQDMULL_qr_m<"vqdmullt", VTI, size, 0b1, cstr>;
+}
+
+defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<MVE_v8s16, 0b0>;
+defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
class MVE_VxADDSUB_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit subtract,
@@ -4493,19 +5177,34 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
let validForTailPredication = 1;
}
-def MVE_VHADD_qr_s8 : MVE_VxADDSUB_qr<"vhadd", "s8", 0b0, 0b00, 0b0>;
-def MVE_VHADD_qr_s16 : MVE_VxADDSUB_qr<"vhadd", "s16", 0b0, 0b01, 0b0>;
-def MVE_VHADD_qr_s32 : MVE_VxADDSUB_qr<"vhadd", "s32", 0b0, 0b10, 0b0>;
-def MVE_VHADD_qr_u8 : MVE_VxADDSUB_qr<"vhadd", "u8", 0b1, 0b00, 0b0>;
-def MVE_VHADD_qr_u16 : MVE_VxADDSUB_qr<"vhadd", "u16", 0b1, 0b01, 0b0>;
-def MVE_VHADD_qr_u32 : MVE_VxADDSUB_qr<"vhadd", "u32", 0b1, 0b10, 0b0>;
+multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
+ Intrinsic unpred_int, Intrinsic pred_int> {
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
+ VTI, unpred_int, pred_int, 1, 1>;
+}
+
+multiclass MVE_VHADD_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VHADDSUB_qr_m<"vhadd", VTI, 0b0, int_arm_mve_vhadd,
+ int_arm_mve_hadd_predicated>;
+
+multiclass MVE_VHSUB_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VHADDSUB_qr_m<"vhsub", VTI, 0b1, int_arm_mve_vhsub,
+ int_arm_mve_hsub_predicated>;
-def MVE_VHSUB_qr_s8 : MVE_VxADDSUB_qr<"vhsub", "s8", 0b0, 0b00, 0b1>;
-def MVE_VHSUB_qr_s16 : MVE_VxADDSUB_qr<"vhsub", "s16", 0b0, 0b01, 0b1>;
-def MVE_VHSUB_qr_s32 : MVE_VxADDSUB_qr<"vhsub", "s32", 0b0, 0b10, 0b1>;
-def MVE_VHSUB_qr_u8 : MVE_VxADDSUB_qr<"vhsub", "u8", 0b1, 0b00, 0b1>;
-def MVE_VHSUB_qr_u16 : MVE_VxADDSUB_qr<"vhsub", "u16", 0b1, 0b01, 0b1>;
-def MVE_VHSUB_qr_u32 : MVE_VxADDSUB_qr<"vhsub", "u32", 0b1, 0b10, 0b1>;
+defm MVE_VHADD_qr_s8 : MVE_VHADD_qr_m<MVE_v16s8>;
+defm MVE_VHADD_qr_s16 : MVE_VHADD_qr_m<MVE_v8s16>;
+defm MVE_VHADD_qr_s32 : MVE_VHADD_qr_m<MVE_v4s32>;
+defm MVE_VHADD_qr_u8 : MVE_VHADD_qr_m<MVE_v16u8>;
+defm MVE_VHADD_qr_u16 : MVE_VHADD_qr_m<MVE_v8u16>;
+defm MVE_VHADD_qr_u32 : MVE_VHADD_qr_m<MVE_v4u32>;
+
+defm MVE_VHSUB_qr_s8 : MVE_VHSUB_qr_m<MVE_v16s8>;
+defm MVE_VHSUB_qr_s16 : MVE_VHSUB_qr_m<MVE_v8s16>;
+defm MVE_VHSUB_qr_s32 : MVE_VHSUB_qr_m<MVE_v4s32>;
+defm MVE_VHSUB_qr_u8 : MVE_VHSUB_qr_m<MVE_v16u8>;
+defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
+defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
let Predicates = [HasMVEFloat] in {
def MVE_VADD_qr_f32 : MVE_VxADDSUB_qr<"vadd", "f32", 0b0, 0b11, 0b0>;
@@ -4515,6 +5214,11 @@ let Predicates = [HasMVEFloat] in {
def MVE_VSUB_qr_f16 : MVE_VxADDSUB_qr<"vsub", "f16", 0b1, 0b11, 0b1>;
}
+defm : MVE_vec_scalar_fp_pat_m<fadd, int_arm_mve_add_predicated,
+ MVE_VADD_qr_f16, MVE_VADD_qr_f32>;
+defm : MVE_vec_scalar_fp_pat_m<fsub, int_arm_mve_sub_predicated,
+ MVE_VSUB_qr_f16, MVE_VSUB_qr_f32>;
+
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
bit bit_7, bit bit_17, list<dag> pattern=[]>
: MVE_qDest_single_rSrc<iname, suffix, pattern> {
@@ -4563,19 +5267,19 @@ defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;
let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
- (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
- (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
- (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup rGPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup rGPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup rGPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), rGPR:$Rm))>;
- def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
- (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
- (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
- def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
- (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
+ def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup rGPR:$Rm)))),
+ (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup rGPR:$Rm)))),
+ (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), rGPR:$Rm))>;
+ def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup rGPR:$Rm)))),
+ (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), rGPR:$Rm))>;
}
class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
@@ -4594,6 +5298,20 @@ def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>;
def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>;
def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>;
+multiclass MVE_VBRSR_pat_m<MVEVectorVTInfo VTI, Instruction Inst> {
+ // Unpredicated
+ def : Pat<(VTI.Vec (int_arm_mve_vbrsr (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm)))>;
+ // Predicated
+ def : Pat<(VTI.Vec (int_arm_mve_vbrsr_predicated
+ (VTI.Vec MQPR:$inactive),
+ (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
+ (VTI.Pred VCCR:$mask))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask),
+ (VTI.Vec MQPR:$inactive)))>;
+}
+
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))),
(v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>;
@@ -4603,11 +5321,19 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))),
(v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>;
+
+ defm : MVE_VBRSR_pat_m<MVE_v16i8, MVE_VBRSR8>;
+ defm : MVE_VBRSR_pat_m<MVE_v8i16, MVE_VBRSR16>;
+ defm : MVE_VBRSR_pat_m<MVE_v4i32, MVE_VBRSR32>;
}
-class MVE_VMUL_qr_int<string iname, string suffix,
- bits<2> size, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+let Predicates = [HasMVEFloat] in {
+ defm : MVE_VBRSR_pat_m<MVE_v8f16, MVE_VBRSR16>;
+ defm : MVE_VBRSR_pat_m<MVE_v4f32, MVE_VBRSR32>;
+}
+
+class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
+ : MVE_qDest_rSrc<iname, suffix, ""> {
let Inst{28} = 0b0;
let Inst{21-20} = size;
@@ -4618,19 +5344,16 @@ class MVE_VMUL_qr_int<string iname, string suffix,
let validForTailPredication = 1;
}
-def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>;
-def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>;
-def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>;
-
-let Predicates = [HasMVEInt] in {
- def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))),
- (v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))),
- (v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>;
- def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))),
- (v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>;
+multiclass MVE_VMUL_qr_int_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VMUL_qr_int<"vmul", VTI.Suffix, VTI.Size>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ mul, int_arm_mve_mul_predicated>;
}
+defm MVE_VMUL_qr_i8 : MVE_VMUL_qr_int_m<MVE_v16i8>;
+defm MVE_VMUL_qr_i16 : MVE_VMUL_qr_int_m<MVE_v8i16>;
+defm MVE_VMUL_qr_i32 : MVE_VMUL_qr_int_m<MVE_v4i32>;
+
class MVE_VxxMUL_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
: MVE_qDest_rSrc<iname, suffix, "", pattern> {
@@ -4643,19 +5366,37 @@ class MVE_VxxMUL_qr<string iname, string suffix,
let Inst{5} = 0b1;
}
-def MVE_VQDMULH_qr_s8 : MVE_VxxMUL_qr<"vqdmulh", "s8", 0b0, 0b00>;
-def MVE_VQDMULH_qr_s16 : MVE_VxxMUL_qr<"vqdmulh", "s16", 0b0, 0b01>;
-def MVE_VQDMULH_qr_s32 : MVE_VxxMUL_qr<"vqdmulh", "s32", 0b0, 0b10>;
+multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
+ Intrinsic int_unpred, Intrinsic int_pred> {
+ def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
+ defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME), VTI,
+ int_unpred, int_pred>;
+}
+
+multiclass MVE_VQDMULH_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VxxMUL_qr_m<"vqdmulh", VTI, 0b0,
+ int_arm_mve_vqdmulh, int_arm_mve_qdmulh_predicated>;
+
+multiclass MVE_VQRDMULH_qr_m<MVEVectorVTInfo VTI> :
+ MVE_VxxMUL_qr_m<"vqrdmulh", VTI, 0b1,
+ int_arm_mve_vqrdmulh, int_arm_mve_qrdmulh_predicated>;
-def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>;
-def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>;
-def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>;
+defm MVE_VQDMULH_qr_s8 : MVE_VQDMULH_qr_m<MVE_v16s8>;
+defm MVE_VQDMULH_qr_s16 : MVE_VQDMULH_qr_m<MVE_v8s16>;
+defm MVE_VQDMULH_qr_s32 : MVE_VQDMULH_qr_m<MVE_v4s32>;
+
+defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>;
+defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>;
+defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
let Predicates = [HasMVEFloat], validForTailPredication = 1 in {
def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>;
def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>;
}
+defm : MVE_vec_scalar_fp_pat_m<fmul, int_arm_mve_mul_predicated,
+ MVE_VMUL_qr_f16, MVE_VMUL_qr_f32>;
+
class MVE_VFMAMLA_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit S,
list<dag> pattern=[]>
@@ -4668,42 +5409,87 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
let Inst{8} = 0b0;
let Inst{5} = 0b0;
let validForTailPredication = 1;
+ let hasSideEffects = 0;
}
-def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>;
-def MVE_VMLA_qr_s16 : MVE_VFMAMLA_qr<"vmla", "s16", 0b0, 0b01, 0b0>;
-def MVE_VMLA_qr_s32 : MVE_VFMAMLA_qr<"vmla", "s32", 0b0, 0b10, 0b0>;
-def MVE_VMLA_qr_u8 : MVE_VFMAMLA_qr<"vmla", "u8", 0b1, 0b00, 0b0>;
-def MVE_VMLA_qr_u16 : MVE_VFMAMLA_qr<"vmla", "u16", 0b1, 0b01, 0b0>;
-def MVE_VMLA_qr_u32 : MVE_VFMAMLA_qr<"vmla", "u32", 0b1, 0b10, 0b0>;
+multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
+ bit scalar_addend> {
+ def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
+ scalar_addend>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
+ defvar v1 = (VTI.Vec MQPR:$v1);
+ defvar v2 = (VTI.Vec MQPR:$v2);
+ defvar vs = (VTI.Vec (ARMvdup rGPR:$s));
+ defvar s = (i32 rGPR:$s);
+ defvar pred = (VTI.Pred VCCR:$pred);
+
+ // The signed and unsigned variants of this instruction have different
+ // encodings, but they're functionally identical. For the sake of
+ // determinism, we generate only the unsigned variant.
+ if VTI.Unsigned then let Predicates = [HasMVEInt] in {
+ if scalar_addend then {
+ def : Pat<(VTI.Vec (add (mul v1, v2), vs)),
+ (VTI.Vec (Inst v1, v2, s))>;
+ } else {
+ def : Pat<(VTI.Vec (add (mul v2, vs), v1)),
+ (VTI.Vec (Inst v1, v2, s))>;
+ }
-def MVE_VMLAS_qr_s8 : MVE_VFMAMLA_qr<"vmlas", "s8", 0b0, 0b00, 0b1>;
-def MVE_VMLAS_qr_s16 : MVE_VFMAMLA_qr<"vmlas", "s16", 0b0, 0b01, 0b1>;
-def MVE_VMLAS_qr_s32 : MVE_VFMAMLA_qr<"vmlas", "s32", 0b0, 0b10, 0b1>;
-def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>;
-def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>;
-def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>;
+ def : Pat<(VTI.Vec (pred_int v1, v2, s, pred)),
+ (VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred))>;
+ }
+}
-let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (add (v4i32 MQPR:$src1),
- (v4i32 (mul (v4i32 MQPR:$src2),
- (v4i32 (ARMvdup (i32 rGPR:$x))))))),
- (v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>;
- def : Pat<(v8i16 (add (v8i16 MQPR:$src1),
- (v8i16 (mul (v8i16 MQPR:$src2),
- (v8i16 (ARMvdup (i32 rGPR:$x))))))),
- (v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>;
- def : Pat<(v16i8 (add (v16i8 MQPR:$src1),
- (v16i8 (mul (v16i8 MQPR:$src2),
- (v16i8 (ARMvdup (i32 rGPR:$x))))))),
- (v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>;
+defm MVE_VMLA_qr_s8 : MVE_VMLA_qr_multi<"vmla", MVE_v16s8, 0b0>;
+defm MVE_VMLA_qr_s16 : MVE_VMLA_qr_multi<"vmla", MVE_v8s16, 0b0>;
+defm MVE_VMLA_qr_s32 : MVE_VMLA_qr_multi<"vmla", MVE_v4s32, 0b0>;
+defm MVE_VMLA_qr_u8 : MVE_VMLA_qr_multi<"vmla", MVE_v16u8, 0b0>;
+defm MVE_VMLA_qr_u16 : MVE_VMLA_qr_multi<"vmla", MVE_v8u16, 0b0>;
+defm MVE_VMLA_qr_u32 : MVE_VMLA_qr_multi<"vmla", MVE_v4u32, 0b0>;
+
+defm MVE_VMLAS_qr_s8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16s8, 0b1>;
+defm MVE_VMLAS_qr_s16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8s16, 0b1>;
+defm MVE_VMLAS_qr_s32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4s32, 0b1>;
+defm MVE_VMLAS_qr_u8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16u8, 0b1>;
+defm MVE_VMLAS_qr_u16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8u16, 0b1>;
+defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
+
+multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
+ bit scalar_addend> {
+ def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar pred_int = int_arm_mve_fma_predicated;
+ defvar v1 = (VTI.Vec MQPR:$v1);
+ defvar v2 = (VTI.Vec MQPR:$v2);
+ defvar vs = (VTI.Vec (ARMvdup (i32 rGPR:$s)));
+ defvar is = (i32 rGPR:$s);
+ defvar pred = (VTI.Pred VCCR:$pred);
+
+ let Predicates = [HasMVEFloat] in {
+ if scalar_addend then {
+ def : Pat<(VTI.Vec (fma v1, v2, vs)),
+ (VTI.Vec (Inst v1, v2, is))>;
+ def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
+ } else {
+ def : Pat<(VTI.Vec (fma v1, vs, v2)),
+ (VTI.Vec (Inst v2, v1, is))>;
+ def : Pat<(VTI.Vec (fma vs, v1, v2)),
+ (VTI.Vec (Inst v2, v1, is))>;
+ def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ }
+ }
}
let Predicates = [HasMVEFloat] in {
- def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>;
- def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>;
- def MVE_VFMA_qr_Sf16 : MVE_VFMAMLA_qr<"vfmas", "f16", 0b1, 0b11, 0b1>;
- def MVE_VFMA_qr_Sf32 : MVE_VFMAMLA_qr<"vfmas", "f32", 0b0, 0b11, 0b1>;
+ defm MVE_VFMA_qr_f16 : MVE_VFMA_qr_multi<"vfma", MVE_v8f16, 0>;
+ defm MVE_VFMA_qr_f32 : MVE_VFMA_qr_multi<"vfma", MVE_v4f32, 0>;
+ defm MVE_VFMA_qr_Sf16 : MVE_VFMA_qr_multi<"vfmas", MVE_v8f16, 1>;
+ defm MVE_VFMA_qr_Sf32 : MVE_VFMA_qr_multi<"vfmas", MVE_v4f32, 1>;
}
class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
@@ -4718,10 +5504,30 @@ class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
let Inst{5} = bit_5;
}
+multiclass MVE_VQDMLAH_qr_multi<string iname, MVEVectorVTInfo VTI,
+ bit bit_5, bit bit_12> {
+ def "": MVE_VQDMLAH_qr<iname, VTI.Suffix, 0b0, VTI.Size, bit_5, bit_12>;
+ defvar Inst = !cast<Instruction>(NAME);
+ defvar unpred_int = !cast<Intrinsic>("int_arm_mve_" # iname);
+ defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_predicated");
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s)))>;
+ def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s), (VTI.Pred VCCR:$pred))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
+ (i32 rGPR:$s), ARMVCCThen,
+ (VTI.Pred VCCR:$pred)))>;
+ }
+}
+
multiclass MVE_VQDMLAH_qr_types<string iname, bit bit_5, bit bit_12> {
- def s8 : MVE_VQDMLAH_qr<iname, "s8", 0b0, 0b00, bit_5, bit_12>;
- def s16 : MVE_VQDMLAH_qr<iname, "s16", 0b0, 0b01, bit_5, bit_12>;
- def s32 : MVE_VQDMLAH_qr<iname, "s32", 0b0, 0b10, bit_5, bit_12>;
+ defm s8 : MVE_VQDMLAH_qr_multi<iname, MVE_v16s8, bit_5, bit_12>;
+ defm s16 : MVE_VQDMLAH_qr_multi<iname, MVE_v8s16, bit_5, bit_12>;
+ defm s32 : MVE_VQDMLAH_qr_multi<iname, MVE_v4s32, bit_5, bit_12>;
}
defm MVE_VQDMLAH_qr : MVE_VQDMLAH_qr_types<"vqdmlah", 0b1, 0b0>;
@@ -4752,6 +5558,7 @@ class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
let Inst{6-1} = 0b110111;
let Inst{0} = imm{0};
let validForTailPredication = 1;
+ let hasSideEffects = 0;
}
def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>;
@@ -4787,6 +5594,7 @@ class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
let Inst{3-1} = Rm{3-1};
let Inst{0} = imm{0};
let validForTailPredication = 1;
+ let hasSideEffects = 0;
}
def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>;
@@ -4855,6 +5663,8 @@ class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
let Inst{12-5} = 0b01111000;
let Inst{4} = idx2;
let Inst{3-0} = Rt{3-0};
+
+ let hasSideEffects = 0;
}
// The assembly syntax for these instructions mentions the vector
@@ -4924,6 +5734,7 @@ class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
let mayLoad = load;
let mayStore = !eq(load,0);
+ let hasSideEffects = 0;
}
// A parameter class used to encapsulate all the ways the writeback
@@ -5004,22 +5815,44 @@ foreach wb = [MVE_vldst24_writeback<
"vst" # n.nvecs # stage # "." # s.lanesize>;
}
+def SDTARMVST2 : SDTypeProfile<1, 5, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVec<3>,
+ SDTCisSameAs<3, 4>, SDTCisVT<5, i32>]>;
+def SDTARMVST4 : SDTypeProfile<1, 7, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVec<3>,
+ SDTCisSameAs<3, 4>, SDTCisSameAs<3, 5>,
+ SDTCisSameAs<3, 6>, SDTCisVT<7, i32>]>;
+def MVEVST2UPD : SDNode<"ARMISD::VST2_UPD", SDTARMVST2, [SDNPHasChain]>;
+def MVEVST4UPD : SDNode<"ARMISD::VST4_UPD", SDTARMVST4, [SDNPHasChain]>;
+
multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
foreach stage = [0,1] in
def : Pat<(int_arm_mve_vst2q i32:$addr,
- (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
+ (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
(!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
- (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
- t2_addr_offset_none:$addr)>;
+ (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ t2_addr_offset_none:$addr)>;
+ foreach stage = [0,1] in
+ def : Pat<(i32 (MVEVST2UPD i32:$addr, (i32 32),
+ (VT MQPR:$v0), (VT MQPR:$v1), (i32 stage))),
+ (i32 (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize#_wb)
+ (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ t2_addr_offset_none:$addr))>;
foreach stage = [0,1,2,3] in
def : Pat<(int_arm_mve_vst4q i32:$addr,
- (VT MQPR:$v0), (VT MQPR:$v1),
- (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
+ (VT MQPR:$v0), (VT MQPR:$v1),
+ (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
(!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
- (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
- VT:$v2, qsub_2, VT:$v3, qsub_3),
- t2_addr_offset_none:$addr)>;
+ (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
+ t2_addr_offset_none:$addr)>;
+ foreach stage = [0,1,2,3] in
+ def : Pat<(i32 (MVEVST4UPD i32:$addr, (i32 64),
+ (VT MQPR:$v0), (VT MQPR:$v1),
+ (VT MQPR:$v2), (VT MQPR:$v3), (i32 stage))),
+ (i32 (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize#_wb)
+ (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
+ t2_addr_offset_none:$addr))>;
}
defm : MVE_vst24_patterns<8, v16i8>;
defm : MVE_vst24_patterns<16, v8i16>;
@@ -5097,6 +5930,7 @@ class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
let mayLoad = dir.load;
let mayStore = !eq(dir.load,0);
+ let hasSideEffects = 0;
let validForTailPredication = 1;
}