diff options
Diffstat (limited to 'lib/Target/ARM/ARMInstrNEON.td')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 451 |
1 files changed, 355 insertions, 96 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 896fd0f7850cf..43bd4c21dc395 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -626,7 +626,7 @@ class VLD1D<bits<4> op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VLD1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), @@ -634,7 +634,7 @@ class VLD1Q<bits<4> op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -655,16 +655,14 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VLD1QWB<bits<4> op7_4, string Dt> { @@ -674,16 +672,14 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -703,7 +699,7 @@ class VLD1D3<bits<4> op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D3WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -712,16 +708,14 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -744,7 +738,7 @@ class VLD1D4<bits<4> op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D4WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -753,16 +747,14 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -786,7 +778,7 @@ class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; @@ -810,16 +802,14 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } @@ -853,7 +843,7 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -872,7 +862,7 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -912,7 +902,7 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -931,7 +921,7 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -1348,7 +1338,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), @@ -1357,7 +1346,6 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { @@ -1369,7 +1357,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), @@ -1378,7 +1365,6 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1419,7 +1405,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -1428,7 +1413,6 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1580,14 +1564,14 @@ class VST1D<bits<4> op7_4, string Dt> IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q<bits<4> op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1608,8 +1592,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), @@ -1617,8 +1600,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VST1QWB<bits<4> op7_4, string Dt> { @@ -1628,8 +1610,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), @@ -1637,8 +1618,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1659,7 +1639,7 @@ class VST1D3<bits<4> op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D3WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1668,8 +1648,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), @@ -1677,8 +1656,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1704,7 +1682,7 @@ class VST1D4<bits<4> op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D4WB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1713,8 +1691,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1722,8 +1699,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1748,7 +1724,7 @@ class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; @@ -1772,16 +1748,14 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } multiclass VST2QWB<bits<4> op7_4, string Dt> { @@ -1791,8 +1765,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1800,8 +1773,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> { "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } @@ -1835,7 +1807,7 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1854,7 +1826,7 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1894,7 +1866,7 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1913,7 +1885,7 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -2379,6 +2351,40 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; +// Same as above, but not predicated. +class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; + +class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; + +// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). +class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; + +// Same as N2VQIntXnp but with Vd as a src register. +class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<op19_18, op17_16, op10_8, op7, op6, + (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { + let Constraints = "$src = $Vd"; +} + // Narrow 2-register operations. class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -2541,6 +2547,16 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; + class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, @@ -2552,6 +2568,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]> { let isCommutable = 0; } + class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, @@ -2584,6 +2601,29 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; + +// Same as N3VQIntnp but with Vd as a src register. +class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, + Dt, ResTy, OpTy, IntOp, Commutable, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), + (OpTy QPR:$Vm))))]> { + let Constraints = "$src = $Vd"; +} + class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -2834,6 +2874,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> @@ -2889,6 +2930,17 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + +// Same as above, but not predicated. +class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp<op27_23, op21_20, op11_8, op6, op4, + (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, + ResTy, OpTy, IntOp, Commutable, + [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; + class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -3965,12 +4017,18 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", - int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; +def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) @@ -4008,6 +4066,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfd DPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; +def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfq QPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; + + // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, @@ -4053,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", NEONvmulls, 1>; -defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", NEONvmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", - v8i16, v8i8, int_arm_neon_vmullp, 1>; +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "NEONData" in { + defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; + defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; + def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; + def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, + "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, + Requires<[HasV8, HasCrypto]>; +} defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; @@ -4125,8 +4200,27 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlal", "s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; + "vqdmlal", "s", null_frag>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -4182,25 +4276,44 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlsl", "s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; // Match @llvm.fma.* intrinsics def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), @@ -4248,12 +4361,18 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", - int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; +def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Comparisons. // VCEQ : Vector Compare Equal @@ -4659,6 +4778,18 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +// VMAXNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v2f32, v2f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v4f32, v4f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -4673,6 +4804,18 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +// VMINNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v2f32, v2f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v4f32, v4f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // Vector Pairwise Operations. // VPADD : Vector Pairwise Add @@ -5015,10 +5158,10 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // Vector Move Operations. // VMOV : Vector Move (Register) -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -5386,6 +5529,26 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; +// VCVT{A, N, P, M} +multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, + SDPatternOperator IntU> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; + def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; + def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; + def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + } +} + +defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; + // VCVT : Vector Convert Between Floating-Point and Fixed-Point. let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", @@ -5409,6 +5572,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", + (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", + (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", + (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", + (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", + (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", + (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", + (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", + (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; + + // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, IIC_VUNAQ, "vcvt", "f16.f32", @@ -5509,8 +5691,9 @@ class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), (Ty DPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; } class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> @@ -5525,14 +5708,14 @@ class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> } def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{11-8} = index{3-0}; + let Inst{10-8} = index{2-0}; } def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{11-9} = index{2-0}; + let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{11-10} = index{1-0}; + let Inst{10} = index{0}; let Inst{9-8} = 0b00; } def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), @@ -5657,6 +5840,77 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +// VRINT : Vector Rounding +multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + } + + def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), + (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; + def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), + (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + +// Cryptography instructions +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "v8Crypto" in { + class AES<string op, bit op7, bit op6, SDPatternOperator Int> + : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> + : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> + : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, + Requires<[HasV8, HasCrypto]>; +} + +def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; +def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; +def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; +def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; + +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; +def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; +def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; +def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; +def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// @@ -6697,12 +6951,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", |