diff options
Diffstat (limited to 'lib/Target/ARM/ARMInstrNEON.td')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 191 |
1 files changed, 116 insertions, 75 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 806681df102c..60ca92e58041 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -15,22 +15,22 @@ // NEON-specific Operands. //===----------------------------------------------------------------------===// def nModImm : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; } def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } def nImmSplatI8 : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI8AsmOperand; } def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } def nImmSplatI16 : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI16AsmOperand; } def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } def nImmSplatI32 : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI32AsmOperand; } def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } @@ -43,7 +43,7 @@ def nImmSplatNotI32 : Operand<i32> { } def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } def nImmVMOVI32 : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } @@ -62,18 +62,18 @@ class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> } class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; } class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; } def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } def nImmVMOVI32Neg : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmVMOVI32NegAsmOperand; } def nImmVMOVF32 : Operand<i32> { @@ -82,7 +82,7 @@ def nImmVMOVF32 : Operand<i32> { } def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; + let PrintMethod = "printVMOVModImmOperand"; let ParserMatchClass = nImmSplatI64AsmOperand; } @@ -478,20 +478,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// -def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; -def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; - -def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; -def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; -def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; -def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; -def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; -def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; -def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; -def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; -def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; -def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; -def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; +def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; // Types for vector shift by immediates. The "SHX" version is for long and // narrow operations where the source and destination vectors have different @@ -559,14 +547,14 @@ def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; def NEONimmAllZerosV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; def NEONimmAllOnesV: PatLeaf<(ARMvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); unsigned EltBits = 0; - uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); + uint64_t EltVal = ARM_AM::decodeVMOVModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 8 && EltVal == 0xff); }]>; @@ -3326,30 +3314,30 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // source operand element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm, SDNode OpNode> { + string asm, int fc> { // 64-bit vector types. def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, opc, !strconcat(Dt, "8"), asm, "", - [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; + [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), (i32 fc))))]>; def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, opc, !strconcat(Dt, "16"), asm, "", - [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), (i32 fc))))]>; def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, opc, !strconcat(Dt, "32"), asm, "", - [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), (i32 fc))))]>; def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, opc, "f32", asm, "", - [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, opc, "f16", asm, "", - [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>, + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3358,30 +3346,83 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, opc, !strconcat(Dt, "8"), asm, "", - [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; + [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), (i32 fc))))]>; def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, opc, !strconcat(Dt, "16"), asm, "", - [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), (i32 fc))))]>; def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, opc, !strconcat(Dt, "32"), asm, "", - [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), (i32 fc))))]>; def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, opc, "f32", asm, "", - [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, opc, "f16", asm, "", - [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>, + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } } +// Neon 3-register comparisons. +class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 1, op4, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), (i32 fc))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, + OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), (i32 fc))))]> { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + int fc, bit Commutable = 0> { + // 64-bit vector types. + def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16, + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, fc, Commutable>; + def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16, + OpcodeStr, !strconcat(Dt, "16"), + v4i16, v4i16, fc, Commutable>; + def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32, + OpcodeStr, !strconcat(Dt, "32"), + v2i32, v2i32, fc, Commutable>; + + // 128-bit vector types. + def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16, + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, fc, Commutable>; + def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16, + OpcodeStr, !strconcat(Dt, "16"), + v8i16, v8i16, fc, Commutable>; + def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32, + OpcodeStr, !strconcat(Dt, "32"), + v4i32, v4i32, fc, Commutable>; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -5026,67 +5067,67 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - NEONvceq, 1>; -def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - NEONvceq, 1>, +defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", 0, 1>; +def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, + 0, 1>; +def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, + 0, 1>; +def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; -def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - NEONvceq, 1>, +def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", NEONvceqz>; + "$Vd, $Vm, #0", 0>; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - NEONvcge, 0>; -def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - NEONvcge, 0>, +defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", 10, 0>; +defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", 2, 0>; +def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, + 10, 0>; +def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, + 10, 0>; +def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - NEONvcge, 0>, +def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", NEONvcgez>; + "$Vd, $Vm, #0", 10>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", NEONvclez>; + "$Vd, $Vm, #0", 13>; } // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - NEONvcgt, 0>; -def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - NEONvcgt, 0>, +defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", 12, 0>; +defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", 8, 0>; +def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, + 12, 0>; +def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, + 12, 0>; +def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - NEONvcgt, 0>, +def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", NEONvcgtz>; + "$Vd, $Vm, #0", 12>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", NEONvcltz>; + "$Vd, $Vm, #0", 11>; } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) |