diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP1Instructions.td')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 376 |
1 files changed, 275 insertions, 101 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 48548d8b6722..1d374a9f90ba 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -59,9 +59,9 @@ class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1On let AsmVariantName = AMDGPUAsmVariants.Default; } -class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : +class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : VOP_Real <ps>, - InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, + InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, SIMCInstr <ps.PseudoInstr, EncodingFamily> { let VALU = 1; @@ -110,13 +110,18 @@ class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { } multiclass VOP1Inst <string opName, VOPProfile P, - SDPatternOperator node = null_frag> { + SDPatternOperator node = null_frag, int VOPDOp = -1> { // We only want to set this on the basic, non-SDWA or DPP forms. - defvar should_mov_imm = !eq(opName, "v_mov_b32"); + defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), + !eq(opName, "v_mov_b64")); let isMoveImm = should_mov_imm in { - def _e32 : VOP1_Pseudo <opName, P>; - def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; + if !eq(VOPDOp, -1) then + def _e32 : VOP1_Pseudo <opName, P>; + else + // Only for V_MOV_B32 + def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">; + def _e64 : VOP3InstBase <opName, P, node>; } foreach _ = BoolToList<P.HasExtSDWA>.ret in @@ -125,6 +130,11 @@ multiclass VOP1Inst <string opName, VOPProfile P, foreach _ = BoolToList<P.HasExtDPP>.ret in def _dpp : VOP1_DPP_Pseudo <opName, P>; + let SubtargetPredicate = isGFX11Plus in { + foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in + def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; + } // End SubtargetPredicate = isGFX11Plus + def : MnemonicAlias<opName#"_e32", opName>, LetDummies; def : MnemonicAlias<opName#"_e64", opName>, LetDummies; @@ -141,7 +151,9 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : VOPProfile<[dstVt, srcVt, untyped, untyped]> { let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); + let InsVOP3Base = (ins Src0DPP:$src0, clampmod:$clamp, omod:$omod); let Asm64 = "$vdst, $src0$clamp$omod"; + let AsmVOP3DPPBase = Asm64; let HasModifiers = 0; let HasClamp = 1; @@ -151,6 +163,12 @@ def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; +def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ + let HasExtVOP3DPP = 0; +} + +// OMod clears exceptions when set. OMod was always an operand, but its +// now explicitly set. class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : VOPProfile<[dstVt, srcVt, untyped, untyped]> { @@ -165,11 +183,21 @@ def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; //===----------------------------------------------------------------------===// let VOPAsmPrefer32Bit = 1 in { -defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; +defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; +} + +def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { + let InsVOPDX = (ins Src0RC32:$src0X); + let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); + let InsVOPDY = (ins Src0RC32:$src0Y); + let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; +defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; + +let SubtargetPredicate = isGFX940Plus in +defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; } // End isMoveImm = 1 // FIXME: Specify SchedRW for READFIRSTLANE_B32 @@ -282,7 +310,7 @@ defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; -defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; +defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; @@ -472,7 +500,7 @@ let SubtargetPredicate = isGFX9Only in { } // End SubtargetPredicate = isGFX9Only let SubtargetPredicate = isGFX10Plus in { - defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; + defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; let Uses = [M0] in { defm V_MOVRELSD_2_B32 : @@ -498,6 +526,17 @@ def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1 let isAsCheapAsAMove = 1; } +let SubtargetPredicate = isGFX11Plus in { + // Restrict src0 to be VGPR + def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, + getVOP1Pat64<int_amdgcn_permlane64, + VOP_MOVRELS>.ret, + /*VOP1Only=*/ 1>; + defm V_NOT_B16 : VOP1Inst<"v_not_b16", VOP_I16_I16>; + defm V_CVT_I32_I16 : VOP1Inst<"v_cvt_i32_i16", VOP_I32_I16>; + defm V_CVT_U32_U16 : VOP1Inst<"v_cvt_u32_u16", VOP_I16_I16>; +} // End SubtargetPredicate = isGFX11Plus + //===----------------------------------------------------------------------===// // Target-specific instruction encodings. //===----------------------------------------------------------------------===// @@ -517,9 +556,9 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1 let Inst{31-25} = 0x3f; } -class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : +class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : VOP1_DPP<op, ps, p, 1>, - SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> { + SIMCInstr <ps.PseudoInstr, subtarget> { let AssemblerPredicate = HasDPP16; let SubtargetPredicate = HasDPP16; } @@ -539,10 +578,112 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : } //===----------------------------------------------------------------------===// +// GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { + multiclass VOP1Only_Real_gfx11<bits<9> op> { + let IsSingle = 1 in + def _gfx11 : + VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX11>, + VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; + } + multiclass VOP1_Real_e32_gfx11<bits<9> op, string opName = NAME> { + defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); + def _e32_gfx11 : + VOP1_Real<ps, SIEncodingFamily.GFX11>, + VOP1e<op{7-0}, ps.Pfl>; + } + multiclass VOP1_Real_e32_with_name_gfx11<bits<9> op, string opName, + string asmName> { + defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); + let AsmString = asmName # ps.AsmOperands in { + defm NAME : VOP1_Real_e32_gfx11<op, opName>, + MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; + } + } + multiclass VOP1_Real_e64_gfx11<bits<9> op> { + def _e64_gfx11 : + VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>, + VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; + } + multiclass VOP1_Real_dpp_gfx11<bits<9> op, string opName = NAME> { + defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); + def _dpp_gfx11 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11> { + let DecoderNamespace = "DPPGFX11"; + } + } + multiclass VOP1_Real_dpp_with_name_gfx11<bits<9> op, string opName, + string asmName> { + defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); + let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in { + defm NAME : VOP1_Real_dpp_gfx11<op, opName>, + MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; + } + } + multiclass VOP1_Real_dpp8_gfx11<bits<9> op, string opName = NAME> { + defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); + def _dpp8_gfx11 : VOP1_DPP8<op{7-0}, ps> { + let DecoderNamespace = "DPP8GFX11"; + } + } + multiclass VOP1_Real_dpp8_with_name_gfx11<bits<9> op, string opName, + string asmName> { + defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); + let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in { + defm NAME : VOP1_Real_dpp8_gfx11<op, opName>, + MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; + } + } +} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" + +multiclass VOP1_Realtriple_e64_gfx11<bits<9> op> { + defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; +} +multiclass VOP1_Realtriple_e64_with_name_gfx11<bits<9> op, string opName, + string asmName> { + defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName, + asmName>; +} + +multiclass VOP1_Real_FULL_gfx11<bits<9> op> : + VOP1_Real_e32_gfx11<op>, VOP1_Realtriple_e64_gfx11<op>, + VOP1_Real_dpp_gfx11<op>, VOP1_Real_dpp8_gfx11<op>; + +multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, + string asmName> : + VOP1_Real_e32_with_name_gfx11<op, opName, asmName>, + VOP1_Real_dpp_with_name_gfx11<op, opName, asmName>, + VOP1_Real_dpp8_with_name_gfx11<op, opName, asmName>; + +multiclass VOP1_Real_FULL_with_name_gfx11<bits<9> op, string opName, + string asmName> : + VOP1_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>, + VOP1_Realtriple_e64_with_name_gfx11<op, opName, asmName>; + +multiclass VOP1_Real_NO_DPP_gfx11<bits<9> op> : + VOP1_Real_e32_gfx11<op>, VOP1_Real_e64_gfx11<op>; + +defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c, + "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; +defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d, + "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; +defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039, + "V_FFBH_U32", "v_clz_i32_u32">; +defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a, + "V_FFBL_B32", "v_ctz_i32_b32">; +defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b, + "V_FFBH_I32", "v_cls_i32">; +defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>; +defm V_NOT_B16 : VOP1_Real_FULL_gfx11<0x069>; +defm V_CVT_I32_I16 : VOP1_Real_FULL_gfx11<0x06a>; +defm V_CVT_U32_U16 : VOP1_Real_FULL_gfx11<0x06b>; + +//===----------------------------------------------------------------------===// // GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass VOP1Only_Real_gfx10<bits<9> op> { def _gfx10 : VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, @@ -567,50 +708,59 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { } } multiclass VOP1_Real_dpp_gfx10<bits<9> op> { - foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in - def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> { + foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in + def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { let DecoderNamespace = "SDWA10"; } } multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { - foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in + foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { let DecoderNamespace = "DPP8"; } } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" multiclass VOP1_Real_gfx10<bits<9> op> : VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, VOP1_Real_dpp8_gfx10<op>; -defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; -defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; -defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; -defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; -defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; -defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; -defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; -defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; -defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; -defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; -defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; -defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; -defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; -defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; -defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; -defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; -defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; -defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; -defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; -defm V_COS_F16 : VOP1_Real_gfx10<0x061>; -defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; -defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; -defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; +multiclass VOP1_Real_gfx10_FULL_gfx11<bits<9> op> : + VOP1_Real_gfx10<op>, VOP1_Real_FULL_gfx11<op>; + +multiclass VOP1_Real_gfx10_NO_DPP_gfx11<bits<9> op> : + VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; -defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; -defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; +multiclass VOP1Only_Real_gfx10_gfx11<bits<9> op> : + VOP1Only_Real_gfx10<op>, VOP1Only_Real_gfx11<op>; + +defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>; +defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>; +defm V_CVT_F16_U16 : VOP1_Real_gfx10_FULL_gfx11<0x050>; +defm V_CVT_F16_I16 : VOP1_Real_gfx10_FULL_gfx11<0x051>; +defm V_CVT_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x052>; +defm V_CVT_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x053>; +defm V_RCP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x054>; +defm V_SQRT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x055>; +defm V_RSQ_F16 : VOP1_Real_gfx10_FULL_gfx11<0x056>; +defm V_LOG_F16 : VOP1_Real_gfx10_FULL_gfx11<0x057>; +defm V_EXP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x058>; +defm V_FREXP_MANT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x059>; +defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05a>; +defm V_FLOOR_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05b>; +defm V_CEIL_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05c>; +defm V_TRUNC_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05d>; +defm V_RNDNE_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05e>; +defm V_FRACT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05f>; +defm V_SIN_F16 : VOP1_Real_gfx10_FULL_gfx11<0x060>; +defm V_COS_F16 : VOP1_Real_gfx10_FULL_gfx11<0x061>; +defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10_FULL_gfx11<0x062>; +defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x063>; +defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x064>; + +defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>; +defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>; //===----------------------------------------------------------------------===// // GFX7, GFX10. @@ -635,16 +785,19 @@ multiclass VOP1_Real_gfx7<bits<9> op> : multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; +multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : + VOP1_Real_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; + defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; -defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; -defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; -defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; -defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; +defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x017>; +defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x018>; +defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x019>; +defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x01a>; //===----------------------------------------------------------------------===// -// GFX6, GFX7, GFX10. +// GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { @@ -666,65 +819,71 @@ multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; -defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; -defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; -defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; -defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; -defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; -defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; -defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; +multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<bits<9> op> : + VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL_gfx11<op>; -defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; -defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; -defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; -defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; -defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; -defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; -defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; -defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; -defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; -defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; +multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : + VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; + +defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; +defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; +defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; +defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; +defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; +defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; +defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; + +defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x000>; +defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x001>; +defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x003>; +defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x004>; +defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>; +defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>; +defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>; +defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>; +defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00a>; +defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00b>; defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; -defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; -defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; -defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; -defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; -defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; -defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; -defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; -defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; -defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; -defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; -defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; -defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; -defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; -defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; -defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; -defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; -defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; -defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; -defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; -defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; -defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; -defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; -defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; -defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; -defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; -defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; -defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; +defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>; +defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x00f>; +defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x010>; +defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x011>; +defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x012>; +defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x013>; +defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x014>; +defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x015>; +defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x016>; +defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x020>; +defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x021>; +defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x022>; +defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x023>; +defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x024>; +defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x025>; +defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x027>; +defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02a>; +defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02b>; +defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02e>; +defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x02f>; +defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x031>; +defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x033>; +defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x034>; +defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x035>; +defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x036>; +defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x037>; +defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x038>; defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; -defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; -defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; -defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; -defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; -defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; +defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03c>; +defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03d>; +defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03e>; +defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x03f>; +defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x040>; defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; -defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>; -defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>; -defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>; +defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x042>; +defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x043>; +defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x044>; //===----------------------------------------------------------------------===// // GFX8, GFX9 (VI). @@ -949,14 +1108,29 @@ multiclass VOP1_Real_gfx9 <bits<10> op> { defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; +let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in +defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; + //===----------------------------------------------------------------------===// // GFX10 //===----------------------------------------------------------------------===// -let OtherPredicates = [isGFX10Plus] in { +let OtherPredicates = [isGFX10Only] in { def : GCNPat < (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) >; -} // End OtherPredicates = [isGFX10Plus] +} // End OtherPredicates = [isGFX10Only] + +//===----------------------------------------------------------------------===// +// GFX11 +//===----------------------------------------------------------------------===// + +let OtherPredicates = [isGFX11Only] in { +def : GCNPat < + (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), + (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, + (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) +>; +} // End OtherPredicates = [isGFX11Only] |
