diff options
Diffstat (limited to 'lib/Target/ARM/ARMInstrMVE.td')
-rw-r--r-- | lib/Target/ARM/ARMInstrMVE.td | 1430 |
1 files changed, 1138 insertions, 292 deletions
diff --git a/lib/Target/ARM/ARMInstrMVE.td b/lib/Target/ARM/ARMInstrMVE.td index 3e7ae55c7fc8..4f67cd6e47cc 100644 --- a/lib/Target/ARM/ARMInstrMVE.td +++ b/lib/Target/ARM/ARMInstrMVE.td @@ -160,7 +160,8 @@ class TMemImm7ShiftOffsetAsmOperand<int shift> : AsmOperandClass { let RenderMethod = "addMemImmOffsetOperands"; } -class taddrmode_imm7<int shift> : MemOperand { +class taddrmode_imm7<int shift> : MemOperand, + ComplexPattern<i32, 2, "SelectTAddrModeImm7<"#shift#">", []> { let ParserMatchClass = TMemImm7ShiftOffsetAsmOperand<shift>; // They are printed the same way as the T2 imm8 version let PrintMethod = "printT2AddrModeImm8Operand<false>"; @@ -221,7 +222,9 @@ def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>; def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>; def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>; -class t2am_imm7_offset<int shift> : MemOperand { +class t2am_imm7_offset<int shift> : MemOperand, + ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">", + [], [SDNPWantRoot]> { // They are printed the same way as the imm8 version let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let ParserMatchClass = @@ -371,6 +374,8 @@ class MVE_ScalarShiftSRegReg<string iname, bits<2> op5_4, list<dag> pattern=[]> let Inst{7-6} = 0b00; let Inst{5-4} = op5_4{1-0}; let Inst{3-0} = 0b1101; + + let Unpredictable{8-6} = 0b111; } def MVE_SQRSHR : MVE_ScalarShiftSRegReg<"sqrshr", 0b10>; @@ -403,18 +408,17 @@ class MVE_ScalarShiftDRegImm<string iname, bits<2> op5_4, bit op16, let Inst{3-0} = 0b1111; } -class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16, - list<dag> pattern=[]> +class MVE_ScalarShiftDRegRegBase<string iname, dag iops, string asm, + bit op5, bit op16, list<dag> pattern=[]> : MVE_ScalarShiftDoubleReg< - iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm), - "$RdaLo, $RdaHi, $Rm", "@earlyclobber $RdaHi,@earlyclobber $RdaLo," - "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src", + iname, iops, asm, "@earlyclobber $RdaHi,@earlyclobber $RdaLo," + "$RdaLo = $RdaLo_src,$RdaHi = $RdaHi_src", pattern> { bits<4> Rm; let Inst{16} = op16; let Inst{15-12} = Rm{3-0}; - let Inst{7-6} = 0b00; + let Inst{6} = 0b0; let Inst{5} = op5; let Inst{4} = 0b0; let Inst{3-0} = 0b1101; @@ -427,27 +431,44 @@ class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16, let DecoderMethod = "DecodeMVEOverlappingLongShift"; } -def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, +class MVE_ScalarShiftDRegReg<string iname, bit op5, list<dag> pattern=[]> + : MVE_ScalarShiftDRegRegBase< + iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm), + "$RdaLo, $RdaHi, $Rm", op5, 0b0, pattern> { + + let Inst{7} = 0b0; +} + +class MVE_ScalarShiftDRegRegWithSat<string iname, bit op5, list<dag> pattern=[]> + : MVE_ScalarShiftDRegRegBase< + iname, (ins tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm, saturateop:$sat), + "$RdaLo, $RdaHi, $sat, $Rm", op5, 0b1, pattern> { + bit sat; + + let Inst{7} = sat; +} + +def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMasrl tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm))]>; def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMasrl tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; -def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; +def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsll tGPREven:$RdaLo_src, tGPROdd:$RdaHi_src, rGPR:$Rm))]>; def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsll tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi, (ARMlsrl tGPREven:$RdaLo_src, - tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>; + tGPROdd:$RdaHi_src, (i32 long_shift:$imm)))]>; -def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1, 0b1>; +def MVE_SQRSHRL : MVE_ScalarShiftDRegRegWithSat<"sqrshrl", 0b1>; def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>; def MVE_SRSHRL : MVE_ScalarShiftDRegImm<"srshrl", 0b10, 0b1>; -def MVE_UQRSHLL : MVE_ScalarShiftDRegReg<"uqrshll", 0b0, 0b1>; +def MVE_UQRSHLL : MVE_ScalarShiftDRegRegWithSat<"uqrshll", 0b0>; def MVE_UQSHLL : MVE_ScalarShiftDRegImm<"uqshll", 0b00, 0b1>; def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>; @@ -531,6 +552,19 @@ defm MVE_VADDVu8 : MVE_VADDV_A<"u8", 0b1, 0b00>; defm MVE_VADDVu16 : MVE_VADDV_A<"u16", 0b1, 0b01>; defm MVE_VADDVu32 : MVE_VADDV_A<"u32", 0b1, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(i32 (vecreduce_add (v4i32 MQPR:$src))), (i32 (MVE_VADDVu32no_acc $src))>; + def : Pat<(i32 (vecreduce_add (v8i16 MQPR:$src))), (i32 (MVE_VADDVu16no_acc $src))>; + def : Pat<(i32 (vecreduce_add (v16i8 MQPR:$src))), (i32 (MVE_VADDVu8no_acc $src))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v4i32 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu32acc $src2, $src1))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v8i16 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu16acc $src2, $src1))>; + def : Pat<(i32 (add (i32 (vecreduce_add (v16i8 MQPR:$src1))), (i32 tGPR:$src2))), + (i32 (MVE_VADDVu8acc $src2, $src1))>; + +} + class MVE_VADDLV<string iname, string suffix, dag iops, string cstr, bit A, bit U, list<dag> pattern=[]> : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname, @@ -636,6 +670,35 @@ multiclass MVE_VMINMAXV_ty<string iname, bit bit_7, list<dag> pattern=[]> { defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 0b1>; defm MVE_VMAXV : MVE_VMINMAXV_ty<"vmaxv", 0b0>; +let Predicates = [HasMVEInt] in { + def : Pat<(i32 (vecreduce_smax (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVs8 (t2MVNi (i32 127)), $src))>; + def : Pat<(i32 (vecreduce_smax (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVs16 (t2MOVi32imm (i32 -32768)), $src))>; + def : Pat<(i32 (vecreduce_smax (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVs32 (t2MOVi (i32 -2147483648)), $src))>; + def : Pat<(i32 (vecreduce_umax (v16i8 MQPR:$src))), + (i32 (MVE_VMAXVu8 (t2MOVi (i32 0)), $src))>; + def : Pat<(i32 (vecreduce_umax (v8i16 MQPR:$src))), + (i32 (MVE_VMAXVu16 (t2MOVi (i32 0)), $src))>; + def : Pat<(i32 (vecreduce_umax (v4i32 MQPR:$src))), + (i32 (MVE_VMAXVu32 (t2MOVi (i32 0)), $src))>; + + def : Pat<(i32 (vecreduce_smin (v16i8 MQPR:$src))), + (i32 (MVE_VMINVs8 (t2MOVi (i32 127)), $src))>; + def : Pat<(i32 (vecreduce_smin (v8i16 MQPR:$src))), + (i32 (MVE_VMINVs16 (t2MOVi16 (i32 32767)), $src))>; + def : Pat<(i32 (vecreduce_smin (v4i32 MQPR:$src))), + (i32 (MVE_VMINVs32 (t2MVNi (i32 -2147483648)), $src))>; + def : Pat<(i32 (vecreduce_umin (v16i8 MQPR:$src))), + (i32 (MVE_VMINVu8 (t2MOVi (i32 255)), $src))>; + def : Pat<(i32 (vecreduce_umin (v8i16 MQPR:$src))), + (i32 (MVE_VMINVu16 (t2MOVi16 (i32 65535)), $src))>; + def : Pat<(i32 (vecreduce_umin (v4i32 MQPR:$src))), + (i32 (MVE_VMINVu32 (t2MOVi (i32 4294967295)), $src))>; + +} + multiclass MVE_VMINMAXAV_ty<string iname, bit bit_7, list<dag> pattern=[]> { def s8 : MVE_VMINMAXV<iname, "s8", 0b0, 0b00, 0b0, bit_7>; def s16 : MVE_VMINMAXV<iname, "s16", 0b0, 0b01, 0b0, bit_7>; @@ -667,57 +730,57 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr, let Inst{0} = bit_0; } -multiclass MVE_VMLAMLSDAV_X<string iname, string suffix, dag iops, string cstr, - bit sz, bit bit_28, bit A, bit bit_8, bit bit_0, - list<dag> pattern=[]> { - def _noexch : MVE_VMLAMLSDAV<iname, suffix, iops, cstr, sz, - bit_28, A, 0b0, bit_8, bit_0, pattern>; - def _exch : MVE_VMLAMLSDAV<iname # "x", suffix, iops, cstr, sz, - bit_28, A, 0b1, bit_8, bit_0, pattern>; +multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix, + bit sz, bit bit_28, bit X, bit bit_8, bit bit_0, + list<dag> pattern=[]> { + def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix, + (ins MQPR:$Qn, MQPR:$Qm), "", + sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; + def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix, + (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm), + "$RdaDest = $RdaSrc", + sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; +} + +multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28, + bit bit_8, bit bit_0, list<dag> pattern=[]> { + defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28, + 0b0, bit_8, bit_0, pattern>; + defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28, + 0b1, bit_8, bit_0, pattern>; } -multiclass MVE_VMLAMLSDAV_XA<string iname, string suffix, bit sz, bit bit_28, - bit bit_8, bit bit_0, list<dag> pattern=[]> { - defm _noacc : MVE_VMLAMLSDAV_X<iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, bit_8, bit_0, pattern>; - defm _acc : MVE_VMLAMLSDAV_X<iname # "a", suffix, - (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm), - "$RdaDest = $RdaSrc", - sz, bit_28, 0b1, bit_8, bit_0, pattern>; +multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8, + list<dag> pattern=[]> { + defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix, + sz, 0b0, bit_8, 0b0, pattern>; + defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix, + sz, 0b1, 0b0, bit_8, 0b0, pattern>; } -multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit U, bit bit_8, - list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_XA<"vmladav", suffix, sz, U, bit_8, 0b0, pattern>; +multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28, + list<dag> pattern=[]> { + defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix, + sz, bit_28, 0b0, 0b1, pattern>; } -defm MVE_VMLADAVs16 : MVE_VMLADAV_multi<"s16", 0b0, 0b0, 0b0>; -defm MVE_VMLADAVs32 : MVE_VMLADAV_multi<"s32", 0b1, 0b0, 0b0>; -defm MVE_VMLADAVu16 : MVE_VMLADAV_multi<"u16", 0b0, 0b1, 0b0>; -defm MVE_VMLADAVu32 : MVE_VMLADAV_multi<"u32", 0b1, 0b1, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>; -defm MVE_VMLADAVs8 : MVE_VMLADAV_multi<"s8", 0b0, 0b0, 0b1>; -defm MVE_VMLADAVu8 : MVE_VMLADAV_multi<"u8", 0b0, 0b1, 0b1>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>; // vmlav aliases vmladav -foreach acc = ["_acc", "_noacc"] in { +foreach acc = ["", "a"] in { foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32"] in { - def : MVEInstAlias<!strconcat("vmlav", !if(!eq(acc, "_acc"), "a", ""), - "${vp}.", suffix, "\t$RdaDest, $Qn, $Qm"), - (!cast<Instruction>("MVE_VMLADAV"#suffix#acc#"_noexch") + def : MVEInstAlias<"vmlav"#acc#"${vp}."#suffix#"\t$RdaDest, $Qn, $Qm", + (!cast<Instruction>("MVE_VMLADAV"#acc#suffix) tGPREven:$RdaDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; } } -multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28, - list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_XA<"vmlsdav", suffix, sz, bit_28, 0b0, 0b1, pattern>; -} - -defm MVE_VMLSDAVs8 : MVE_VMLSDAV_multi<"s8", 0, 0b1>; -defm MVE_VMLSDAVs16 : MVE_VMLSDAV_multi<"s16", 0, 0b0>; -defm MVE_VMLSDAVs32 : MVE_VMLSDAV_multi<"s32", 1, 0b0>; - // Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr, bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0, @@ -742,82 +805,83 @@ class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr, let Inst{0} = bit_0; } -multiclass MVE_VMLALDAVBase_X<string iname, string suffix, dag iops, - string cstr, bit sz, bit bit_28, bit A, - bit bit_8, bit bit_0, list<dag> pattern=[]> { - def _noexch : MVE_VMLALDAVBase<iname, suffix, iops, cstr, sz, - bit_28, A, 0b0, bit_8, bit_0, pattern>; - def _exch : MVE_VMLALDAVBase<iname # "x", suffix, iops, cstr, sz, - bit_28, A, 0b1, bit_8, bit_0, pattern>; +multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix, + bit sz, bit bit_28, bit X, bit bit_8, bit bit_0, + list<dag> pattern=[]> { + def ""#x#suffix : MVE_VMLALDAVBase< + iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", + sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; + def "a"#x#suffix : MVE_VMLALDAVBase< + iname # "a" # x, suffix, + (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm), + "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc", + sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; } -multiclass MVE_VMLALDAVBase_XA<string iname, string suffix, bit sz, bit bit_28, - bit bit_8, bit bit_0, list<dag> pattern=[]> { - defm _noacc : MVE_VMLALDAVBase_X< - iname, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, bit_8, bit_0, pattern>; - defm _acc : MVE_VMLALDAVBase_X< - iname # "a", suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, - MQPR:$Qn, MQPR:$Qm), - "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc", - sz, bit_28, 0b1, bit_8, bit_0, pattern>; + +multiclass MVE_VMLALDAVBase_AX<string iname, string suffix, bit sz, bit bit_28, + bit bit_8, bit bit_0, list<dag> pattern=[]> { + defm "" : MVE_VMLALDAVBase_A<iname, "", suffix, sz, + bit_28, 0b0, bit_8, bit_0, pattern>; + defm "" : MVE_VMLALDAVBase_A<iname, "x", suffix, sz, + bit_28, 0b1, bit_8, bit_0, pattern>; } -multiclass MVE_VRMLALDAVH_multi<string suffix, bit U, list<dag> pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA< - "vrmlaldavh", suffix, 0b0, U, 0b1, 0b0, pattern>; +multiclass MVE_VRMLALDAVH_multi<string suffix, list<dag> pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix, + 0b0, 0b0, 0b1, 0b0, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix, + 0b0, 0b1, 0b0, 0b1, 0b0, pattern>; } -defm MVE_VRMLALDAVHs32 : MVE_VRMLALDAVH_multi<"s32", 0>; -defm MVE_VRMLALDAVHu32 : MVE_VRMLALDAVH_multi<"u32", 1>; +defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">; // vrmlalvh aliases for vrmlaldavh def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHs32_noacc_noexch + (MVE_VRMLALDAVHs32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvha${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHs32_acc_noexch + (MVE_VRMLALDAVHas32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvh${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHu32_noacc_noexch + (MVE_VRMLALDAVHu32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm", - (MVE_VRMLALDAVHu32_acc_noexch + (MVE_VRMLALDAVHau32 tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -multiclass MVE_VMLALDAV_multi<string suffix, bit sz, bit U, - list<dag> pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA<"vmlaldav", suffix, sz, U, 0b0, 0b0, pattern>; +multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix, + sz, 0b1, 0b0, 0b0, 0b0, pattern>; } -defm MVE_VMLALDAVs16 : MVE_VMLALDAV_multi<"s16", 0b0, 0b0>; -defm MVE_VMLALDAVs32 : MVE_VMLALDAV_multi<"s32", 0b1, 0b0>; -defm MVE_VMLALDAVu16 : MVE_VMLALDAV_multi<"u16", 0b0, 0b1>; -defm MVE_VMLALDAVu32 : MVE_VMLALDAV_multi<"u32", 0b1, 0b1>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>; // vmlalv aliases vmlaldav -foreach acc = ["_acc", "_noacc"] in { +foreach acc = ["", "a"] in { foreach suffix = ["s16", "s32", "u16", "u32"] in { - def : MVEInstAlias<!strconcat("vmlalv", !if(!eq(acc, "_acc"), "a", ""), - "${vp}.", suffix, "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm"), - (!cast<Instruction>("MVE_VMLALDAV"#suffix#acc#"_noexch") + def : MVEInstAlias<"vmlalv" # acc # "${vp}." # suffix # + "\t$RdaLoDest, $RdaHiDest, $Qn, $Qm", + (!cast<Instruction>("MVE_VMLALDAV"#acc#suffix) tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; } } multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz, - bit bit_28, list<dag> pattern=[]> { - defm "" : MVE_VMLALDAVBase_XA<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>; + bit bit_28, list<dag> pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>; } -defm MVE_VMLSLDAVs16 : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; -defm MVE_VMLSLDAVs32 : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; -defm MVE_VRMLSLDAVHs32 : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; +defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; // end of mve_rDest instructions @@ -967,11 +1031,12 @@ def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), let Inst{6} = 0b1; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } -class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7> +class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, string cstr=""> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, - suffix, "$Qd, $Qm", ""> { + suffix, "$Qd, $Qm", cstr> { let Inst{28} = 0b1; let Inst{25-23} = 0b111; @@ -985,9 +1050,9 @@ class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7> let Inst{0} = 0b0; } -def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00>; -def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00>; -def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00>; +def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">; def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>; def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>; @@ -995,6 +1060,13 @@ def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>; def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>; let Predicates = [HasMVEInt] in { + def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))), + (v8i16 (MVE_VREV16_8 (v8i16 MQPR:$src)))>; + def : Pat<(v4i32 (bswap (v4i32 MQPR:$src))), + (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>; +} + +let Predicates = [HasMVEInt] in { def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))), (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>; def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))), @@ -1026,6 +1098,7 @@ def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), let Inst{12-6} = 0b0010111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } let Predicates = [HasMVEInt] in { @@ -1054,6 +1127,7 @@ class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28> let Inst{6} = 0b1; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>; @@ -1145,6 +1219,7 @@ class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps> class MVE_VORR<string suffix, bits<4> cmode, ExpandImm imm_type> : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b0; + let validForTailPredication = 1; } def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>; @@ -1173,6 +1248,7 @@ def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm", class MVE_VBIC<string suffix, bits<4> cmode, ExpandImm imm_type> : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>; @@ -1315,8 +1391,12 @@ let Predicates = [HasMVEInt] in { def : Pat<(insertelt (v8f16 MQPR:$src1), HPR:$src2, imm:$lane), (MVE_VMOV_to_lane_16 MQPR:$src1, (COPY_TO_REGCLASS HPR:$src2, rGPR), imm:$lane)>; - def : Pat<(extractelt (v8f16 MQPR:$src), imm:$lane), - (COPY_TO_REGCLASS (MVE_VMOV_from_lane_u16 MQPR:$src, imm:$lane), HPR)>; + def : Pat<(extractelt (v8f16 MQPR:$src), imm_even:$lane), + (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_even:$lane))>; + def : Pat<(extractelt (v8f16 MQPR:$src), imm_odd:$lane), + (COPY_TO_REGCLASS + (VMOVH (EXTRACT_SUBREG MQPR:$src, (SSubReg_f16_reg imm_odd:$lane))), + HPR)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; @@ -1408,6 +1488,7 @@ class MVE_VADDSUB<string iname, string suffix, bits<2> size, bit subtract, let Inst{12-8} = 0b01000; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VADD<string suffix, bits<2> size, list<dag> pattern=[]> @@ -1442,8 +1523,8 @@ let Predicates = [HasMVEInt] in { } class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract, - bits<2> size, list<dag> pattern=[]> - : MVE_int<iname, suffix, size, pattern> { + bits<2> size, ValueType vt> + : MVE_int<iname, suffix, size, []> { let Inst{28} = U; let Inst{25-23} = 0b110; @@ -1453,26 +1534,49 @@ class MVE_VQADDSUB<string iname, string suffix, bit U, bit subtract, let Inst{8} = 0b0; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; + + ValueType VT = vt; } -class MVE_VQADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]> - : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, pattern>; -class MVE_VQSUB<string suffix, bit U, bits<2> size, list<dag> pattern=[]> - : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, pattern>; +class MVE_VQADD<string suffix, bit U, bits<2> size, ValueType VT> + : MVE_VQADDSUB<"vqadd", suffix, U, 0b0, size, VT>; +class MVE_VQSUB<string suffix, bit U, bits<2> size, ValueType VT> + : MVE_VQADDSUB<"vqsub", suffix, U, 0b1, size, VT>; -def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00>; -def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01>; -def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10>; -def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00>; -def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01>; -def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10>; +def MVE_VQADDs8 : MVE_VQADD<"s8", 0b0, 0b00, v16i8>; +def MVE_VQADDs16 : MVE_VQADD<"s16", 0b0, 0b01, v8i16>; +def MVE_VQADDs32 : MVE_VQADD<"s32", 0b0, 0b10, v4i32>; +def MVE_VQADDu8 : MVE_VQADD<"u8", 0b1, 0b00, v16i8>; +def MVE_VQADDu16 : MVE_VQADD<"u16", 0b1, 0b01, v8i16>; +def MVE_VQADDu32 : MVE_VQADD<"u32", 0b1, 0b10, v4i32>; + +def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00, v16i8>; +def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01, v8i16>; +def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10, v4i32>; +def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00, v16i8>; +def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01, v8i16>; +def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10, v4i32>; + +let Predicates = [HasMVEInt] in { + foreach instr = [MVE_VQADDu8, MVE_VQADDu16, MVE_VQADDu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (uaddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQADDs8, MVE_VQADDs16, MVE_VQADDs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (saddsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBu8, MVE_VQSUBu16, MVE_VQSUBu32] in + foreach VT = [instr.VT] in + def : Pat<(VT (usubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; + foreach instr = [MVE_VQSUBs8, MVE_VQSUBs16, MVE_VQSUBs32] in + foreach VT = [instr.VT] in + def : Pat<(VT (ssubsat (VT MQPR:$Qm), (VT MQPR:$Qn))), + (VT (instr (VT MQPR:$Qm), (VT MQPR:$Qn)))>; +} -def MVE_VQSUBs8 : MVE_VQSUB<"s8", 0b0, 0b00>; -def MVE_VQSUBs16 : MVE_VQSUB<"s16", 0b0, 0b01>; -def MVE_VQSUBs32 : MVE_VQSUB<"s32", 0b0, 0b10>; -def MVE_VQSUBu8 : MVE_VQSUB<"u8", 0b1, 0b00>; -def MVE_VQSUBu16 : MVE_VQSUB<"u16", 0b1, 0b01>; -def MVE_VQSUBu32 : MVE_VQSUB<"u32", 0b1, 0b10>; class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { @@ -1483,6 +1587,7 @@ class MVE_VABD_int<string suffix, bit U, bits<2> size, list<dag> pattern=[]> let Inst{12-8} = 0b00111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; @@ -1501,6 +1606,7 @@ class MVE_VRHADD<string suffix, bit U, bits<2> size, list<dag> pattern=[]> let Inst{12-8} = 0b00001; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>; @@ -1522,6 +1628,7 @@ class MVE_VHADDSUB<string iname, string suffix, bit U, bit subtract, let Inst{8} = 0b0; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VHADD<string suffix, bit U, bits<2> size, @@ -1545,6 +1652,60 @@ def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>; def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>; def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (ARMvshrsImm + (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHADDs8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshrsImm + (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHADDs16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshrsImm + (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHADDs32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshruImm + (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHADDu8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshruImm + (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHADDu16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshruImm + (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHADDu32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshrsImm + (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHSUBs8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshrsImm + (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHSUBs16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshrsImm + (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHSUBs32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; + + def : Pat<(v16i8 (ARMvshruImm + (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), + (v16i8 (MVE_VHSUBu8 + (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; + def : Pat<(v8i16 (ARMvshruImm + (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), + (v8i16 (MVE_VHSUBu16 + (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; + def : Pat<(v4i32 (ARMvshruImm + (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), + (v4i32 (MVE_VHSUBu32 + (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; +} + class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { @@ -1563,6 +1724,7 @@ class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]> let Inst{6} = 0b0; let Inst{5} = E; let Inst{4-0} = 0b10000; + let validForTailPredication = 1; } def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>; @@ -1625,6 +1787,7 @@ class MVE_VCLSCLZ<string iname, string suffix, bits<2> size, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>; @@ -1635,6 +1798,15 @@ def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>; def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>; def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))), + (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>; + def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))), + (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>; + def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))), + (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>; +} + class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate, list<dag> pattern=[]> : MVEIntSingleSrc<iname, suffix, size, pattern> { @@ -1648,6 +1820,7 @@ class MVE_VABSNEG_int<string iname, string suffix, bits<2> size, bit negate, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>; @@ -1689,6 +1862,7 @@ class MVE_VQABSNEG<string iname, string suffix, bits<2> size, let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>; @@ -1720,6 +1894,7 @@ class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op, let Inst{3-0} = imm{3-0}; let DecoderMethod = "DecodeMVEModImmInstruction"; + let validForTailPredication = 1; } let isReMaterializable = 1 in { @@ -2115,6 +2290,7 @@ class MVE_shift_by_vec<string iname, string suffix, bit U, let Inst{4} = bit_4; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; + let validForTailPredication = 1; } multiclass mve_shift_by_vec_multi<string iname, bit bit_4, bit bit_8> { @@ -2163,6 +2339,7 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops, let Inst{4} = 0b1; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm> @@ -2175,6 +2352,7 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, dag imm> let Inst{21-16} = imm; let Inst{10-9} = 0b10; let Inst{8} = bit_8; + let validForTailPredication = 1; } def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> { @@ -2427,6 +2605,7 @@ class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size, let Inst{11-10} = 0b01; let Inst{9-7} = op{2-0}; let Inst{4} = 0b0; + let validForTailPredication = 1; } @@ -2489,6 +2668,7 @@ class MVE_VMUL_fp<string suffix, bit size, list<dag> pattern=[]> let Inst{12-8} = 0b01101; let Inst{7} = Qn{3}; let Inst{4} = 0b1; + let validForTailPredication = 1; } def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>; @@ -2556,8 +2736,38 @@ def MVE_VFMSf32 : MVE_VADDSUBFMA_fp<"vfms", "f32", 0b0, 0b1, 0b0, 0b1, def MVE_VFMSf16 : MVE_VADDSUBFMA_fp<"vfms", "f16", 0b1, 0b1, 0b0, 0b1, (ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">; -def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; -def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +let Predicates = [HasMVEFloat, UseFusedMAC] in { + def : Pat<(v8f16 (fadd (v8f16 MQPR:$src1), + (fmul (v8f16 MQPR:$src2), + (v8f16 MQPR:$src3)))), + (v8f16 (MVE_VFMAf16 $src1, $src2, $src3))>; + def : Pat<(v4f32 (fadd (v4f32 MQPR:$src1), + (fmul (v4f32 MQPR:$src2), + (v4f32 MQPR:$src3)))), + (v4f32 (MVE_VFMAf32 $src1, $src2, $src3))>; + + def : Pat<(v8f16 (fsub (v8f16 MQPR:$src1), + (fmul (v8f16 MQPR:$src2), + (v8f16 MQPR:$src3)))), + (v8f16 (MVE_VFMSf16 $src1, $src2, $src3))>; + def : Pat<(v4f32 (fsub (v4f32 MQPR:$src1), + (fmul (v4f32 MQPR:$src2), + (v4f32 MQPR:$src3)))), + (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>; +} + +let Predicates = [HasMVEFloat] in { + def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), + (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>; + def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), + (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>; +} + + +let validForTailPredication = 1 in { + def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; + def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2566,8 +2776,11 @@ let Predicates = [HasMVEFloat] in { (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; -def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; + +let validForTailPredication = 1 in { + def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; + def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2576,10 +2789,10 @@ let Predicates = [HasMVEFloat] in { (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -class MVE_VCADD<string suffix, bit size, list<dag> pattern=[]> +class MVE_VCADD<string suffix, bit size, string cstr="", list<dag> pattern=[]> : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bit rot; @@ -2598,7 +2811,7 @@ class MVE_VCADD<string suffix, bit size, list<dag> pattern=[]> } def MVE_VCADDf16 : MVE_VCADD<"f16", 0b0>; -def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1>; +def MVE_VCADDf32 : MVE_VCADD<"f32", 0b1, "@earlyclobber $Qd">; class MVE_VABD_fp<string suffix, bit size> : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), @@ -2617,6 +2830,7 @@ class MVE_VABD_fp<string suffix, bit size> let Inst{11-8} = 0b1101; let Inst{7} = Qn{3}; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>; @@ -2643,6 +2857,7 @@ class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op, let Inst{4} = 0b1; let DecoderMethod = "DecodeMVEVCVTt1fp"; + let validForTailPredication = 1; } class MVE_VCVT_imm_asmop<int Bits> : AsmOperandClass { @@ -2693,6 +2908,7 @@ class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm, let Inst{9-8} = rm; let Inst{7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } multiclass MVE_VCVT_fp_int_anpm_multi<string suffix, bits<2> size, bit op, @@ -2727,6 +2943,7 @@ class MVE_VCVT_fp_int<string suffix, bits<2> size, bits<2> op, let Inst{12-9} = 0b0011; let Inst{8-7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } // The unsuffixed VCVT for float->int implicitly rounds toward zero, @@ -2776,6 +2993,7 @@ class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate, let Inst{11-8} = 0b0111; let Inst{7} = negate; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>; @@ -2863,6 +3081,7 @@ class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20, // decoder to emit an operand that isn't affected by any instruction // bit. let DecoderMethod = "DecodeMVEVCMP<false," # predtype.DecoderMethod # ">"; + let validForTailPredication = 1; } class MVE_VCMPqqf<string suffix, bit size> @@ -2927,6 +3146,7 @@ class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20, let Constraints = ""; // Custom decoder method, for the same reason as MVE_VCMPqq let DecoderMethod = "DecodeMVEVCMP<true," # predtype.DecoderMethod # ">"; + let validForTailPredication = 1; } class MVE_VCMPqrf<string suffix, bit size> @@ -2966,6 +3186,168 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>; def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; +multiclass unpred_vcmp_z<string suffix, int fc> { + def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; + def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; + def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmp_r<string suffix, int fc> { + def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; + def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; + def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; + + def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; + def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))), + (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))), + (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))), + (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmpf_z<int fc> { + def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; + def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))), + (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; +} + +multiclass unpred_vcmpf_r<int fc> { + def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; + def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; + + def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; + def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), + (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), + (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; +} + +let Predicates = [HasMVEInt] in { + defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>; + defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>; + defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>; + defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>; + defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>; + defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>; + defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; + defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; + + defm MVE_VCEQ : unpred_vcmp_r<"i", 0>; + defm MVE_VCNE : unpred_vcmp_r<"i", 1>; + defm MVE_VCGE : unpred_vcmp_r<"s", 10>; + defm MVE_VCLT : unpred_vcmp_r<"s", 11>; + defm MVE_VCGT : unpred_vcmp_r<"s", 12>; + defm MVE_VCLE : unpred_vcmp_r<"s", 13>; + defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; + defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; +} + +let Predicates = [HasMVEFloat] in { + defm MVE_VFCEQZ : unpred_vcmpf_z<0>; + defm MVE_VFCNEZ : unpred_vcmpf_z<1>; + defm MVE_VFCGEZ : unpred_vcmpf_z<10>; + defm MVE_VFCLTZ : unpred_vcmpf_z<11>; + defm MVE_VFCGTZ : unpred_vcmpf_z<12>; + defm MVE_VFCLEZ : unpred_vcmpf_z<13>; + + defm MVE_VFCEQ : unpred_vcmpf_r<0>; + defm MVE_VFCNE : unpred_vcmpf_r<1>; + defm MVE_VFCGE : unpred_vcmpf_r<10>; + defm MVE_VFCLT : unpred_vcmpf_r<11>; + defm MVE_VFCGT : unpred_vcmpf_r<12>; + defm MVE_VFCLE : unpred_vcmpf_r<13>; +} + + +// Extra "worst case" and/or/xor partterns, going into and out of GRP +multiclass two_predops<SDPatternOperator opnode, Instruction insn> { + def v16i1 : Pat<(v16i1 (opnode (v16i1 VCCR:$p1), (v16i1 VCCR:$p2))), + (v16i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v16i1 VCCR:$p2), rGPR))), + VCCR))>; + def v8i1 : Pat<(v8i1 (opnode (v8i1 VCCR:$p1), (v8i1 VCCR:$p2))), + (v8i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v8i1 VCCR:$p2), rGPR))), + VCCR))>; + def v4i1 : Pat<(v4i1 (opnode (v4i1 VCCR:$p1), (v4i1 VCCR:$p2))), + (v4i1 (COPY_TO_REGCLASS + (insn (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p1), rGPR)), + (i32 (COPY_TO_REGCLASS (v4i1 VCCR:$p2), rGPR))), + VCCR))>; +} + +let Predicates = [HasMVEInt] in { + defm POR : two_predops<or, t2ORRrr>; + defm PAND : two_predops<and, t2ANDrr>; + defm PEOR : two_predops<xor, t2EORrr>; +} + +// Occasionally we need to cast between a i32 and a boolean vector, for +// example when moving between rGPR and VPR.P0 as part of predicate vector +// shuffles. We also sometimes need to cast between different predicate +// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles. + +def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>; + +let Predicates = [HasMVEInt] in { + foreach VT = [ v4i1, v8i1, v16i1 ] in { + def : Pat<(i32 (predicate_cast (VT VCCR:$src))), + (i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>; + def : Pat<(VT (predicate_cast (i32 VCCR:$src))), + (VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>; + + foreach VT2 = [ v4i1, v8i1, v16i1 ] in + def : Pat<(VT (predicate_cast (VT2 VCCR:$src))), + (VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>; + } +} + // end of MVE compares // start of MVE_qDest_qSrc @@ -2989,10 +3371,10 @@ class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops, } class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract, - string suffix, bits<2> size, list<dag> pattern=[]> + string suffix, bits<2> size, string cstr="", list<dag> pattern=[]> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", - vpred_n, "$Qd = $Qd_src", pattern> { + vpred_n, "$Qd = $Qd_src"#cstr, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -3009,7 +3391,7 @@ multiclass MVE_VQxDMLxDH_multi<string iname, bit exch, bit round, bit subtract> { def s8 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s8", 0b00>; def s16 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s16", 0b01>; - def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10>; + def s32 : MVE_VQxDMLxDH<iname, exch, round, subtract, "s32", 0b10, ",@earlyclobber $Qd">; } defm MVE_VQDMLADH : MVE_VQxDMLxDH_multi<"vqdmladh", 0b0, 0b0, 0b0>; @@ -3021,10 +3403,10 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>; defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>; defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>; -class MVE_VCMUL<string iname, string suffix, bit size, list<dag> pattern=[]> +class MVE_VCMUL<string iname, string suffix, bit size, string cstr="", list<dag> pattern=[]> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, "", pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bits<2> rot; @@ -3041,13 +3423,13 @@ class MVE_VCMUL<string iname, string suffix, bit size, list<dag> pattern=[]> } def MVE_VCMULf16 : MVE_VCMUL<"vcmul", "f16", 0b0>; -def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1>; +def MVE_VCMULf32 : MVE_VCMUL<"vcmul", "f32", 0b1, "@earlyclobber $Qd">; class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20, - bit T, list<dag> pattern=[]> + bit T, string cstr, list<dag> pattern=[]> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", - vpred_r, "", pattern> { + vpred_r, cstr, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -3063,9 +3445,9 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20, } multiclass MVE_VMULL_multi<string iname, string suffix, - bit bit_28, bits<2> bits_21_20> { - def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0>; - def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1>; + bit bit_28, bits<2> bits_21_20, string cstr=""> { + def bh : MVE_VMULL<iname # "b", suffix, bit_28, bits_21_20, 0b0, cstr>; + def th : MVE_VMULL<iname # "t", suffix, bit_28, bits_21_20, 0b1, cstr>; } // For integer multiplies, bits 21:20 encode size, and bit 28 signedness. @@ -3074,10 +3456,10 @@ multiclass MVE_VMULL_multi<string iname, string suffix, defm MVE_VMULLs8 : MVE_VMULL_multi<"vmull", "s8", 0b0, 0b00>; defm MVE_VMULLs16 : MVE_VMULL_multi<"vmull", "s16", 0b0, 0b01>; -defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10>; +defm MVE_VMULLs32 : MVE_VMULL_multi<"vmull", "s32", 0b0, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLu8 : MVE_VMULL_multi<"vmull", "u8", 0b1, 0b00>; defm MVE_VMULLu16 : MVE_VMULL_multi<"vmull", "u16", 0b1, 0b01>; -defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10>; +defm MVE_VMULLu32 : MVE_VMULL_multi<"vmull", "u32", 0b1, 0b10, "@earlyclobber $Qd">; defm MVE_VMULLp8 : MVE_VMULL_multi<"vmull", "p8", 0b0, 0b11>; defm MVE_VMULLp16 : MVE_VMULL_multi<"vmull", "p16", 0b1, 0b11>; @@ -3144,6 +3526,18 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>; defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>; defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; +def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; +let Predicates = [HasMVEInt] in { + def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))), + (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; + def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))), + (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; + def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))), + (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; + def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))), + (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; +} + class MVE_VCVT_ff<string iname, string suffix, bit op, bit T, list<dag> pattern=[]> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), @@ -3166,11 +3560,10 @@ defm MVE_VCVTf16f32 : MVE_VCVT_ff_halves<"f16.f32", 0b0>; defm MVE_VCVTf32f16 : MVE_VCVT_ff_halves<"f32.f16", 0b1>; class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve, - list<dag> pattern=[]> + string cstr="", list<dag> pattern=[]> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, "", - pattern> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, pattern> { bits<4> Qn; bit rot; @@ -3186,11 +3579,11 @@ class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve, def MVE_VCADDi8 : MVE_VxCADD<"vcadd", "i8", 0b00, 0b1>; def MVE_VCADDi16 : MVE_VxCADD<"vcadd", "i16", 0b01, 0b1>; -def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1>; +def MVE_VCADDi32 : MVE_VxCADD<"vcadd", "i32", 0b10, 0b1, "@earlyclobber $Qd">; def MVE_VHCADDs8 : MVE_VxCADD<"vhcadd", "s8", 0b00, 0b0>; def MVE_VHCADDs16 : MVE_VxCADD<"vhcadd", "s16", 0b01, 0b0>; -def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0>; +def MVE_VHCADDs32 : MVE_VxCADD<"vhcadd", "s32", 0b10, 0b0, "@earlyclobber $Qd">; class MVE_VADCSBC<string iname, bit I, bit subtract, dag carryin, list<dag> pattern=[]> @@ -3220,10 +3613,10 @@ def MVE_VSBC : MVE_VADCSBC<"vsbc", 0b0, 0b1, (ins cl_FPSCR_NZCV:$carryin)>; def MVE_VSBCI : MVE_VADCSBC<"vsbci", 0b1, 0b1, (ins)>; class MVE_VQDMULL<string iname, string suffix, bit size, bit T, - list<dag> pattern=[]> + string cstr="", list<dag> pattern=[]> : MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", - vpred_r, "", pattern> { + vpred_r, cstr, pattern> { bits<4> Qn; let Inst{28} = size; @@ -3236,13 +3629,13 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T, let Inst{0} = 0b1; } -multiclass MVE_VQDMULL_halves<string suffix, bit size> { - def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_halves<string suffix, bit size, string cstr=""> { + def bh : MVE_VQDMULL<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULLs16 : MVE_VQDMULL_halves<"s16", 0b0>; -defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1>; +defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<"s32", 0b1, "@earlyclobber $Qd">; // end of mve_qDest_qSrc @@ -3267,9 +3660,9 @@ class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname, let Inst{3-0} = Rm{3-0}; } -class MVE_qDest_rSrc<string iname, string suffix, list<dag> pattern=[]> +class MVE_qDest_rSrc<string iname, string suffix, string cstr="", list<dag> pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm), - NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, "", + NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, pattern>; class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]> @@ -3291,7 +3684,7 @@ class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]> class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size, bit bit_5, bit bit_12, bit bit_16, bit bit_28, list<dag> pattern=[]> - : MVE_qDest_rSrc<iname, suffix, pattern> { + : MVE_qDest_rSrc<iname, suffix, "", pattern> { let Inst{28} = bit_28; let Inst{21-20} = size; @@ -3299,6 +3692,7 @@ class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size, let Inst{12} = bit_12; let Inst{8} = 0b1; let Inst{5} = bit_5; + let validForTailPredication = 1; } multiclass MVE_VADDSUB_qr_sizes<string iname, string suffix, @@ -3320,9 +3714,27 @@ defm MVE_VSUB_qr_i : MVE_VADDSUB_qr_sizes<"vsub", "i", 0b0, 0b1, 0b1, 0b0>; defm MVE_VQSUB_qr_s : MVE_VADDSUB_qr_sizes<"vqsub", "s", 0b1, 0b1, 0b0, 0b0>; defm MVE_VQSUB_qr_u : MVE_VADDSUB_qr_sizes<"vqsub", "u", 0b1, 0b1, 0b0, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (add (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VADD_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (add (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VADD_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (add (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VADD_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (sub (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VSUB_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (sub (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VSUB_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (sub (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VSUB_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + class MVE_VQDMULL_qr<string iname, string suffix, bit size, - bit T, list<dag> pattern=[]> - : MVE_qDest_rSrc<iname, suffix, pattern> { + bit T, string cstr="", list<dag> pattern=[]> + : MVE_qDest_rSrc<iname, suffix, cstr, pattern> { let Inst{28} = size; let Inst{21-20} = 0b11; @@ -3332,18 +3744,18 @@ class MVE_VQDMULL_qr<string iname, string suffix, bit size, let Inst{5} = 0b1; } -multiclass MVE_VQDMULL_qr_halves<string suffix, bit size> { - def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0>; - def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1>; +multiclass MVE_VQDMULL_qr_halves<string suffix, bit size, string cstr=""> { + def bh : MVE_VQDMULL_qr<"vqdmullb", suffix, size, 0b0, cstr>; + def th : MVE_VQDMULL_qr<"vqdmullt", suffix, size, 0b1, cstr>; } defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<"s16", 0b0>; -defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1>; +defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<"s32", 0b1, "@earlyclobber $Qd">; class MVE_VxADDSUB_qr<string iname, string suffix, bit bit_28, bits<2> bits_21_20, bit subtract, list<dag> pattern=[]> - : MVE_qDest_rSrc<iname, suffix, pattern> { + : MVE_qDest_rSrc<iname, suffix, "", pattern> { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -3351,6 +3763,7 @@ class MVE_VxADDSUB_qr<string iname, string suffix, let Inst{12} = subtract; let Inst{8} = 0b1; let Inst{5} = 0b0; + let validForTailPredication = 1; } def MVE_VHADD_qr_s8 : MVE_VxADDSUB_qr<"vhadd", "s8", 0b0, 0b00, 0b0>; @@ -3388,6 +3801,7 @@ class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size, let Inst{12-8} = 0b11110; let Inst{7} = bit_7; let Inst{6-4} = 0b110; + let validForTailPredication = 1; } multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> { @@ -3421,7 +3835,7 @@ let Predicates = [HasMVEInt] in { } class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]> - : MVE_qDest_rSrc<iname, suffix, pattern> { + : MVE_qDest_rSrc<iname, suffix, "", pattern> { let Inst{28} = 0b1; let Inst{21-20} = size; @@ -3429,15 +3843,27 @@ class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]> let Inst{12} = 0b1; let Inst{8} = 0b0; let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>; def MVE_VBRSR16 : MVE_VBRSR<"vbrsr", "16", 0b01>; def MVE_VBRSR32 : MVE_VBRSR<"vbrsr", "32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 ( bitreverse (v16i8 MQPR:$val1))), + (v16i8 ( MVE_VBRSR8 (v16i8 MQPR:$val1), (t2MOVi (i32 8)) ))>; + + def : Pat<(v4i32 ( bitreverse (v4i32 MQPR:$val1))), + (v4i32 ( MVE_VBRSR32 (v4i32 MQPR:$val1), (t2MOVi (i32 32)) ))>; + + def : Pat<(v8i16 ( bitreverse (v8i16 MQPR:$val1))), + (v8i16 ( MVE_VBRSR16 (v8i16 MQPR:$val1), (t2MOVi (i32 16)) ))>; +} + class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]> - : MVE_qDest_rSrc<iname, suffix, pattern> { + : MVE_qDest_rSrc<iname, suffix, "", pattern> { let Inst{28} = 0b0; let Inst{21-20} = size; @@ -3445,15 +3871,25 @@ class MVE_VMUL_qr_int<string iname, string suffix, let Inst{12} = 0b1; let Inst{8} = 0b0; let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>; def MVE_VMUL_qr_i16 : MVE_VMUL_qr_int<"vmul", "i16", 0b01>; def MVE_VMUL_qr_i32 : MVE_VMUL_qr_int<"vmul", "i32", 0b10>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 (ARMvdup GPR:$val2)))), + (v16i8 (MVE_VMUL_qr_i8 (v16i8 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 (ARMvdup GPR:$val2)))), + (v8i16 (MVE_VMUL_qr_i16 (v8i16 MQPR:$val1), (i32 GPR:$val2)))>; + def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 (ARMvdup GPR:$val2)))), + (v4i32 (MVE_VMUL_qr_i32 (v4i32 MQPR:$val1), (i32 GPR:$val2)))>; +} + class MVE_VxxMUL_qr<string iname, string suffix, bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]> - : MVE_qDest_rSrc<iname, suffix, pattern> { + : MVE_qDest_rSrc<iname, suffix, "", pattern> { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -3471,14 +3907,14 @@ def MVE_VQRDMULH_qr_s8 : MVE_VxxMUL_qr<"vqrdmulh", "s8", 0b1, 0b00>; def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>; def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>; -let Predicates = [HasMVEFloat] in { +let Predicates = [HasMVEFloat], validForTailPredication = 1 in { def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>; def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>; } class MVE_VFMAMLA_qr<string iname, string suffix, - bit bit_28, bits<2> bits_21_20, bit S, - list<dag> pattern=[]> + bit bit_28, bits<2> bits_21_20, bit S, + list<dag> pattern=[]> : MVE_qDestSrc_rSrc<iname, suffix, pattern> { let Inst{28} = bit_28; @@ -3487,6 +3923,7 @@ class MVE_VFMAMLA_qr<string iname, string suffix, let Inst{12} = S; let Inst{8} = 0b0; let Inst{5} = 0b0; + let validForTailPredication = 1; } def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>; @@ -3503,6 +3940,21 @@ def MVE_VMLAS_qr_u8 : MVE_VFMAMLA_qr<"vmlas", "u8", 0b1, 0b00, 0b1>; def MVE_VMLAS_qr_u16 : MVE_VFMAMLA_qr<"vmlas", "u16", 0b1, 0b01, 0b1>; def MVE_VMLAS_qr_u32 : MVE_VFMAMLA_qr<"vmlas", "u32", 0b1, 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v4i32 (add (v4i32 MQPR:$src1), + (v4i32 (mul (v4i32 MQPR:$src2), + (v4i32 (ARMvdup (i32 rGPR:$x))))))), + (v4i32 (MVE_VMLA_qr_u32 $src1, $src2, $x))>; + def : Pat<(v8i16 (add (v8i16 MQPR:$src1), + (v8i16 (mul (v8i16 MQPR:$src2), + (v8i16 (ARMvdup (i32 rGPR:$x))))))), + (v8i16 (MVE_VMLA_qr_u16 $src1, $src2, $x))>; + def : Pat<(v16i8 (add (v16i8 MQPR:$src1), + (v16i8 (mul (v16i8 MQPR:$src2), + (v16i8 (ARMvdup (i32 rGPR:$x))))))), + (v16i8 (MVE_VMLA_qr_u8 $src1, $src2, $x))>; +} + let Predicates = [HasMVEFloat] in { def MVE_VFMA_qr_f16 : MVE_VFMAMLA_qr<"vfma", "f16", 0b1, 0b11, 0b0>; def MVE_VFMA_qr_f32 : MVE_VFMAMLA_qr<"vfma", "f32", 0b0, 0b11, 0b0>; @@ -3555,6 +4007,7 @@ class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12, let Inst{7} = imm{1}; let Inst{6-1} = 0b110111; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>; @@ -3589,6 +4042,7 @@ class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12, let Inst{6-4} = 0b110; let Inst{3-1} = Rm{3-1}; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>; @@ -3599,6 +4053,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; +let hasSideEffects = 1 in class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { @@ -3614,6 +4069,7 @@ class MVE_VCTP<string suffix, bits<2> size, list<dag> pattern=[]> let Constraints = ""; let DecoderMethod = "DecodeMveVCTP"; + let validForTailPredication = 1; } def MVE_VCTP8 : MVE_VCTP<"8", 0b00>; @@ -3621,6 +4077,15 @@ def MVE_VCTP16 : MVE_VCTP<"16", 0b01>; def MVE_VCTP32 : MVE_VCTP<"32", 0b10>; def MVE_VCTP64 : MVE_VCTP<"64", 0b11>; +let Predicates = [HasMVEInt] in { + def : Pat<(int_arm_vctp8 rGPR:$Rn), + (v16i1 (MVE_VCTP8 rGPR:$Rn))>; + def : Pat<(int_arm_vctp16 rGPR:$Rn), + (v8i1 (MVE_VCTP16 rGPR:$Rn))>; + def : Pat<(int_arm_vctp32 rGPR:$Rn), + (v4i1 (MVE_VCTP32 rGPR:$Rn))>; +} + // end of mve_qDest_rSrc // start of coproc mov @@ -3863,6 +4328,7 @@ class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc, let mayLoad = dir.load; let mayStore = !eq(dir.load,0); + let validForTailPredication = 1; } // Contiguous load and store instructions. These come in two main @@ -4165,7 +4631,8 @@ class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> patte let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [VPR, P0]; + let Defs = [VPR]; + let validForTailPredication = 1; } class MVE_VPTt1<string suffix, bits<2> size, dag iops> @@ -4177,11 +4644,12 @@ class MVE_VPTt1<string suffix, bits<2> size, dag iops> let Inst{5} = Qm{3}; let Inst{3-1} = Qm{2-0}; let Inst{0} = fc{1}; + let validForTailPredication = 1; } class MVE_VPTt1i<string suffix, bits<2> size> : MVE_VPTt1<suffix, size, - (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, MQPR:$Qm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b0; } @@ -4192,7 +4660,7 @@ def MVE_VPTv16i8 : MVE_VPTt1i<"i8", 0b00>; class MVE_VPTt1u<string suffix, bits<2> size> : MVE_VPTt1<suffix, size, - (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, MQPR:$Qm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{0} = 0b1; } @@ -4203,7 +4671,7 @@ def MVE_VPTv16u8 : MVE_VPTt1u<"u8", 0b00>; class MVE_VPTt1s<string suffix, bits<2> size> : MVE_VPTt1<suffix, size, - (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, MQPR:$Qm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4225,7 +4693,7 @@ class MVE_VPTt2<string suffix, bits<2> size, dag iops> class MVE_VPTt2i<string suffix, bits<2> size> : MVE_VPTt2<suffix, size, - (ins vpt_mask:$Mk, pred_basic_i:$fc, MQPR:$Qn, GPRwithZR:$Rm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_i:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b0; } @@ -4236,7 +4704,7 @@ def MVE_VPTv16i8r : MVE_VPTt2i<"i8", 0b00>; class MVE_VPTt2u<string suffix, bits<2> size> : MVE_VPTt2<suffix, size, - (ins vpt_mask:$Mk, pred_basic_u:$fc, MQPR:$Qn, GPRwithZR:$Rm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_u:$fc)> { let Inst{12} = 0b0; let Inst{5} = 0b1; } @@ -4247,7 +4715,7 @@ def MVE_VPTv16u8r : MVE_VPTt2u<"u8", 0b00>; class MVE_VPTt2s<string suffix, bits<2> size> : MVE_VPTt2<suffix, size, - (ins vpt_mask:$Mk, pred_basic_s:$fc, MQPR:$Qn, GPRwithZR:$Rm)> { + (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_s:$fc)> { let Inst{12} = 0b1; } @@ -4276,12 +4744,13 @@ class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern= let Inst{7} = fc{0}; let Inst{4} = 0b0; - let Defs = [P0]; + let Defs = [VPR]; let Predicates = [HasMVEFloat]; + let validForTailPredication = 1; } class MVE_VPTft1<string suffix, bit size> - : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, MQPR:$Qm), + : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, MQPR:$Qm, pred_basic_fp:$fc), "$fc, $Qn, $Qm"> { bits<3> fc; bits<4> Qm; @@ -4296,7 +4765,7 @@ def MVE_VPTv4f32 : MVE_VPTft1<"f32", 0b0>; def MVE_VPTv8f16 : MVE_VPTft1<"f16", 0b1>; class MVE_VPTft2<string suffix, bit size> - : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, pred_basic_fp:$fc, MQPR:$Qn, GPRwithZR:$Rm), + : MVE_VPTf<suffix, size, (ins vpt_mask:$Mk, MQPR:$Qn, GPRwithZR:$Rm, pred_basic_fp:$fc), "$fc, $Qn, $Rm"> { bits<3> fc; bits<4> Rm; @@ -4322,7 +4791,8 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary, let Unpredictable{7} = 0b1; let Unpredictable{5} = 0b1; - let Defs = [P0]; + let Uses = [VPR]; + let validForTailPredication = 1; } def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, @@ -4346,6 +4816,7 @@ def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, let Inst{4} = 0b0; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b1; + let validForTailPredication = 1; } foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", @@ -4353,19 +4824,113 @@ foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm", (MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary, +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + + def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>; + + def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), + (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>; + def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + + def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), + (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), + (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + + // Pred <-> Int + def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))), + (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>; + def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))), + (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>; + def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))), + (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; + + def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))), + (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))), + (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>; + def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))), + (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>; +} + +let Predicates = [HasMVEFloat] in { + // Pred <-> Float + // 112 is 1.0 in float + def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))), + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + // 2620 in 1.0 in half + def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))), + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + // 240 is -1.0 in float + def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))), + (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), 0, VCCR:$pred))>; + // 2748 is -1.0 in half + def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))), + (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; + + def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; +} + +def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, "vpnot", "", "", vpred_n, "", []> { let Inst{31-0} = 0b11111110001100010000111101001101; let Unpredictable{19-17} = 0b111; let Unpredictable{12} = 0b1; let Unpredictable{7} = 0b1; let Unpredictable{5} = 0b1; - let Defs = [P0]; - let Uses = [P0]; let Constraints = ""; + let DecoderMethod = "DecodeMVEVPNOT"; } +let Predicates = [HasMVEInt] in { + def : Pat<(v4i1 (xor (v4i1 VCCR:$pred), (v4i1 (predicate_cast (i32 65535))))), + (v4i1 (MVE_VPNOT (v4i1 VCCR:$pred)))>; + def : Pat<(v8i1 (xor (v8i1 VCCR:$pred), (v8i1 (predicate_cast (i32 65535))))), + (v8i1 (MVE_VPNOT (v8i1 VCCR:$pred)))>; + def : Pat<(v16i1 (xor (v16i1 VCCR:$pred), (v16i1 (predicate_cast (i32 65535))))), + (v16i1 (MVE_VPNOT (v16i1 VCCR:$pred)))>; +} + + class MVE_loltp_start<dag iops, string asm, string ops, bits<2> size> : t2LOL<(outs GPRlr:$LR), iops, asm, ops> { bits<4> Rn; @@ -4433,159 +4998,440 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { // Patterns //===----------------------------------------------------------------------===// -class MVE_unpred_vector_store_typed<ValueType Ty, Instruction RegImmInst, +class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>; +class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst, + PatFrag StoreKind, int shift> + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred)>; + +multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind, + int shift> { + def : MVE_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>; + def : MVE_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>; +} + +class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst, + PatFrag LoadKind, int shift> + : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)), + (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>; +class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst, + PatFrag LoadKind, int shift> + : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, (i32 1), VCCR:$pred))>; + +multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind, + int shift> { + def : MVE_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>; + def : MVE_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>; +} + +class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode, PatFrag StoreKind, int shift> - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr)>; + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>; -multiclass MVE_unpred_vector_store<Instruction RegImmInst, PatFrag StoreKind, +multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind, int shift> { - def : MVE_unpred_vector_store_typed<v16i8, RegImmInst, StoreKind, shift>; - def : MVE_unpred_vector_store_typed<v8i16, RegImmInst, StoreKind, shift>; - def : MVE_unpred_vector_store_typed<v8f16, RegImmInst, StoreKind, shift>; - def : MVE_unpred_vector_store_typed<v4i32, RegImmInst, StoreKind, shift>; - def : MVE_unpred_vector_store_typed<v4f32, RegImmInst, StoreKind, shift>; - def : MVE_unpred_vector_store_typed<v2i64, RegImmInst, StoreKind, shift>; - def : MVE_unpred_vector_store_typed<v2f64, RegImmInst, StoreKind, shift>; -} - -class MVE_unpred_vector_load_typed<ValueType Ty, Instruction RegImmInst, - PatFrag LoadKind, int shift> - : Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr)), - (Ty (RegImmInst t2addrmode_imm7<shift>:$addr))>; - -multiclass MVE_unpred_vector_load<Instruction RegImmInst, PatFrag LoadKind, - int shift> { - def : MVE_unpred_vector_load_typed<v16i8, RegImmInst, LoadKind, shift>; - def : MVE_unpred_vector_load_typed<v8i16, RegImmInst, LoadKind, shift>; - def : MVE_unpred_vector_load_typed<v8f16, RegImmInst, LoadKind, shift>; - def : MVE_unpred_vector_load_typed<v4i32, RegImmInst, LoadKind, shift>; - def : MVE_unpred_vector_load_typed<v4f32, RegImmInst, LoadKind, shift>; - def : MVE_unpred_vector_load_typed<v2i64, RegImmInst, LoadKind, shift>; - def : MVE_unpred_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>; -} + def : MVE_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>; + def : MVE_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>; +} + +def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 2; +}]>; +def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 2; +}]>; + + +def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + return Ld->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; +}]>; +def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; +}]>; +def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; +}]>; +def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; +}]>; +def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + auto *Ld = cast<MaskedLoadSDNode>(N); + EVT ScalarVT = Ld->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; +}]>; + +def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (maskedstore8 node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); +}]>; +def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + +def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (maskedstore16 node:$val, node:$ptr, node:$pred), [{ + return cast<MaskedStoreSDNode>(N)->isTruncatingStore(); +}]>; +def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, node:$pred), [{ + auto *St = cast<MaskedStoreSDNode>(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; let Predicates = [HasMVEInt, IsLE] in { - defm : MVE_unpred_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>; - defm : MVE_unpred_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>; - defm : MVE_unpred_vector_store<MVE_VSTRWU32, alignedstore32, 2>; + // Stores + defm : MVE_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>; + defm : MVE_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>; + defm : MVE_vector_store<MVE_VSTRWU32, alignedstore32, 2>; - defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>; - defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>; - defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>; + // Loads + defm : MVE_vector_load<MVE_VLDRBU8, byte_alignedload, 0>; + defm : MVE_vector_load<MVE_VLDRHU16, hword_alignedload, 1>; + defm : MVE_vector_load<MVE_VLDRWU32, alignedload32, 2>; - def : Pat<(v16i1 (load t2addrmode_imm7<2>:$addr)), - (v16i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; - def : Pat<(v8i1 (load t2addrmode_imm7<2>:$addr)), - (v8i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; - def : Pat<(v4i1 (load t2addrmode_imm7<2>:$addr)), - (v4i1 (VLDR_P0_off t2addrmode_imm7<2>:$addr))>; + // Pre/post inc stores + defm : MVE_vector_offset_store<MVE_VSTRBU8_pre, pre_store, 0>; + defm : MVE_vector_offset_store<MVE_VSTRBU8_post, post_store, 0>; + defm : MVE_vector_offset_store<MVE_VSTRHU16_pre, aligned16_pre_store, 1>; + defm : MVE_vector_offset_store<MVE_VSTRHU16_post, aligned16_post_store, 1>; + defm : MVE_vector_offset_store<MVE_VSTRWU32_pre, aligned32_pre_store, 2>; + defm : MVE_vector_offset_store<MVE_VSTRWU32_post, aligned32_post_store, 2>; } let Predicates = [HasMVEInt, IsBE] in { - def : MVE_unpred_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>; - def : MVE_unpred_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>; - def : MVE_unpred_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>; - def : MVE_unpred_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>; - def : MVE_unpred_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>; - - def : MVE_unpred_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>; - def : MVE_unpred_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>; - def : MVE_unpred_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>; - def : MVE_unpred_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>; - def : MVE_unpred_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>; + // Aligned Stores + def : MVE_vector_store_typed<v16i8, MVE_VSTRBU8, store, 0>; + def : MVE_vector_store_typed<v8i16, MVE_VSTRHU16, alignedstore16, 1>; + def : MVE_vector_store_typed<v8f16, MVE_VSTRHU16, alignedstore16, 1>; + def : MVE_vector_store_typed<v4i32, MVE_VSTRWU32, alignedstore32, 2>; + def : MVE_vector_store_typed<v4f32, MVE_VSTRWU32, alignedstore32, 2>; + + // Aligned Loads + def : MVE_vector_load_typed<v16i8, MVE_VLDRBU8, load, 0>; + def : MVE_vector_load_typed<v8i16, MVE_VLDRHU16, alignedload16, 1>; + def : MVE_vector_load_typed<v8f16, MVE_VLDRHU16, alignedload16, 1>; + def : MVE_vector_load_typed<v4i32, MVE_VLDRWU32, alignedload32, 2>; + def : MVE_vector_load_typed<v4f32, MVE_VLDRWU32, alignedload32, 2>; + + // Other unaligned loads/stores need to go though a VREV + def : Pat<(v2f64 (load t2addrmode_imm7<0>:$addr)), + (v2f64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v2i64 (load t2addrmode_imm7<0>:$addr)), + (v2i64 (MVE_VREV64_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v4i32 (load t2addrmode_imm7<0>:$addr)), + (v4i32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v4f32 (load t2addrmode_imm7<0>:$addr)), + (v4f32 (MVE_VREV32_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v8i16 (load t2addrmode_imm7<0>:$addr)), + (v8i16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(v8f16 (load t2addrmode_imm7<0>:$addr)), + (v8f16 (MVE_VREV16_8 (MVE_VLDRBU8 t2addrmode_imm7<0>:$addr)))>; + def : Pat<(store (v2f64 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v2i64 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV64_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v4f32 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV32_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr), + (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + + // Pre/Post inc stores + def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_pre, pre_store, 0>; + def : MVE_vector_offset_store_typed<v16i8, MVE_VSTRBU8_post, post_store, 0>; + def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>; + def : MVE_vector_offset_store_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_store, 1>; + def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>; + def : MVE_vector_offset_store_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_store, 1>; + def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>; + def : MVE_vector_offset_store_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_store, 2>; + def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>; + def : MVE_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>; } +let Predicates = [HasMVEInt] in { + // Aligned masked store, shared between LE and BE + def : MVE_vector_maskedstore_typed<v16i8, MVE_VSTRBU8, maskedstore8, 0>; + def : MVE_vector_maskedstore_typed<v8i16, MVE_VSTRHU16, maskedstore16, 1>; + def : MVE_vector_maskedstore_typed<v8f16, MVE_VSTRHU16, maskedstore16, 1>; + def : MVE_vector_maskedstore_typed<v4i32, MVE_VSTRWU32, maskedstore32, 2>; + def : MVE_vector_maskedstore_typed<v4f32, MVE_VSTRWU32, maskedstore32, 2>; + // Truncating stores + def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred), + (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + // Aligned masked loads + def : MVE_vector_maskedload_typed<v16i8, MVE_VLDRBU8, maskedload8, 0>; + def : MVE_vector_maskedload_typed<v8i16, MVE_VLDRHU16, alignedmaskedload16, 1>; + def : MVE_vector_maskedload_typed<v8f16, MVE_VLDRHU16, alignedmaskedload16, 1>; + def : MVE_vector_maskedload_typed<v4i32, MVE_VLDRWU32, alignedmaskedload32, 2>; + def : MVE_vector_maskedload_typed<v4f32, MVE_VLDRWU32, alignedmaskedload32, 2>; + // Extending masked loads. + def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v8i16 NEONimmAllZerosV))), + (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, + (v4i32 NEONimmAllZerosV))), + (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; +} // Widening/Narrowing Loads/Stores +let MinAlignment = 2 in { + def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi16 node:$val, node:$ptr)>; + def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi16 node:$val, node:$base, node:$offset)>; + def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi16 node:$val, node:$base, node:$offset)>; +} + let Predicates = [HasMVEInt] in { - def : Pat<(truncstorevi8 (v8i16 MQPR:$val), t2addrmode_imm7<1>:$addr), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<1>:$addr)>; - def : Pat<(truncstorevi8 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<1>:$addr)>; - def : Pat<(truncstorevi16 (v4i32 MQPR:$val), t2addrmode_imm7<2>:$addr), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<2>:$addr)>; + def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr), + (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>; + def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr), + (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>; + + def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; +} + + +let MinAlignment = 2 in { + def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; + def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; + def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; } multiclass MVEExtLoad<string DestLanes, string DestElemBits, string SrcElemBits, string SrcElemType, - Operand am> { + string Align, Operand am> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits) - (!cast<PatFrag>("extloadvi" # SrcElemBits) am:$addr)), + (!cast<PatFrag>("extloadvi" # SrcElemBits # Align) am:$addr)), (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits) am:$addr)>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits) - (!cast<PatFrag>("zextloadvi" # SrcElemBits) am:$addr)), + (!cast<PatFrag>("zextloadvi" # SrcElemBits # Align) am:$addr)), (!cast<Instruction>("MVE_VLDR" # SrcElemType # "U" # DestElemBits) am:$addr)>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # "i" # DestElemBits) - (!cast<PatFrag>("sextloadvi" # SrcElemBits) am:$addr)), + (!cast<PatFrag>("sextloadvi" # SrcElemBits # Align) am:$addr)), (!cast<Instruction>("MVE_VLDR" # SrcElemType # "S" # DestElemBits) am:$addr)>; } let Predicates = [HasMVEInt] in { - defm : MVEExtLoad<"4", "32", "8", "B", t2addrmode_imm7<1>>; - defm : MVEExtLoad<"8", "16", "8", "B", t2addrmode_imm7<1>>; - defm : MVEExtLoad<"4", "32", "16", "H", t2addrmode_imm7<2>>; + defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>; + defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>; } // Bit convert patterns let Predicates = [HasMVEInt] in { - def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v2i64 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v2f64 MQPR:$src))), (v2i64 MQPR:$src)>; - def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v4f32 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v4i32 MQPR:$src))), (v4f32 MQPR:$src)>; - def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v8f16 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v8i16 MQPR:$src))), (v8f16 MQPR:$src)>; } let Predicates = [IsLE,HasMVEInt] in { - def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; - def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; - - def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; - def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; - - def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; - def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; - - def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; - def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; - - def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>; - def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>; - - def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; - def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; - - def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>; - def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 MQPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 MQPR:$src)>; + + def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 MQPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 MQPR:$src)>; + + def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 MQPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 MQPR:$src)>; + + def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 MQPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 MQPR:$src)>; + + def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 MQPR:$src)>; + def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 MQPR:$src)>; + + def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 MQPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 MQPR:$src)>; + + def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 MQPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 MQPR:$src)>; +} + +let Predicates = [IsBE,HasMVEInt] in { + def : Pat<(v2f64 (bitconvert (v4f32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v4i32 MQPR:$src))), (v2f64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v8f16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v8i16 MQPR:$src))), (v2f64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2f64 (bitconvert (v16i8 MQPR:$src))), (v2f64 (MVE_VREV64_8 MQPR:$src))>; + + def : Pat<(v2i64 (bitconvert (v4f32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v4i32 MQPR:$src))), (v2i64 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v8f16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v8i16 MQPR:$src))), (v2i64 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v2i64 (bitconvert (v16i8 MQPR:$src))), (v2i64 (MVE_VREV64_8 MQPR:$src))>; + + def : Pat<(v4f32 (bitconvert (v2f64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v2i64 MQPR:$src))), (v4f32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v8f16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v8i16 MQPR:$src))), (v4f32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4f32 (bitconvert (v16i8 MQPR:$src))), (v4f32 (MVE_VREV32_8 MQPR:$src))>; + + def : Pat<(v4i32 (bitconvert (v2f64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v2i64 MQPR:$src))), (v4i32 (MVE_VREV64_32 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v8f16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v8i16 MQPR:$src))), (v4i32 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v4i32 (bitconvert (v16i8 MQPR:$src))), (v4i32 (MVE_VREV32_8 MQPR:$src))>; + + def : Pat<(v8f16 (bitconvert (v2f64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v2i64 MQPR:$src))), (v8f16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v4f32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v4i32 MQPR:$src))), (v8f16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8f16 (bitconvert (v16i8 MQPR:$src))), (v8f16 (MVE_VREV16_8 MQPR:$src))>; + + def : Pat<(v8i16 (bitconvert (v2f64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v2i64 MQPR:$src))), (v8i16 (MVE_VREV64_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v4f32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v4i32 MQPR:$src))), (v8i16 (MVE_VREV32_16 MQPR:$src))>; + def : Pat<(v8i16 (bitconvert (v16i8 MQPR:$src))), (v8i16 (MVE_VREV16_8 MQPR:$src))>; + + def : Pat<(v16i8 (bitconvert (v2f64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v2i64 MQPR:$src))), (v16i8 (MVE_VREV64_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v4f32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v4i32 MQPR:$src))), (v16i8 (MVE_VREV32_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v8f16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>; + def : Pat<(v16i8 (bitconvert (v8i16 MQPR:$src))), (v16i8 (MVE_VREV16_8 MQPR:$src))>; } |