diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/BUFInstructions.td')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 875 |
1 files changed, 627 insertions, 248 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index a535c8cc0918..a087323e5de7 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -35,11 +35,6 @@ class MUBUFAddr64Table <bit is_addr64, string Name> { string OpName = Name; } -class MUBUFLdsTable <bit is_lds, string Name> { - bit IsLds = is_lds; - string OpName = Name; -} - class MTBUFAddr64Table <bit is_addr64, string Name> { bit IsAddr64 = is_addr64; string OpName = Name; @@ -100,8 +95,8 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins, bits<1> sccb_value = 0; } -class MTBUF_Real <MTBUF_Pseudo ps> : - InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> { +class MTBUF_Real <MTBUF_Pseudo ps, string real_name = ps.Mnemonic> : + InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []> { let isPseudo = 0; let isCodeGenOnly = 0; @@ -136,7 +131,7 @@ class MTBUF_Real <MTBUF_Pseudo ps> : bits<3> nfmt = format{6-4}; // GFX90A+ only: instruction uses AccVGPR for data - // Bit superceedes tfe. + // Bit supersedes tfe. bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); } @@ -320,7 +315,7 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins, bits<1> idxen = 0; bits<1> addr64 = 0; bits<1> lds = 0; - bits<1> has_vdata = 1; + bits<1> has_vdata = !not(lds); bits<1> has_vaddr = 1; bits<1> has_glc = 1; bits<1> has_dlc = 1; @@ -337,8 +332,8 @@ class MUBUF_Pseudo <string opName, dag outs, dag ins, bits<1> IsBufferInv = 0; } -class MUBUF_Real <MUBUF_Pseudo ps> : - InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> { +class MUBUF_Real <MUBUF_Pseudo ps, string real_name = ps.Mnemonic> : + InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []> { let isPseudo = 0; let isCodeGenOnly = 0; @@ -360,6 +355,8 @@ class MUBUF_Real <MUBUF_Pseudo ps> : let mayStore = ps.mayStore; let IsAtomicRet = ps.IsAtomicRet; let IsAtomicNoRet = ps.IsAtomicNoRet; + let VALU = ps.VALU; + let LGKM_CNT = ps.LGKM_CNT; bits<12> offset; bits<5> cpol; @@ -370,8 +367,8 @@ class MUBUF_Real <MUBUF_Pseudo ps> : bits<8> soffset; // GFX90A+ only: instruction uses AccVGPR for data - // Bit superceedes tfe. - bits<1> acc = !if(ps.has_vdata, vdata{9}, 0); + // Bit supersedes tfe. + bits<1> acc = !if(ps.has_vdata, vdata{9}, !if(ps.lds, ?, 0)); } @@ -486,16 +483,17 @@ class MUBUF_Load_Pseudo <string opName, ValueType vdata_vt, bit HasTiedDest = 0, bit isLds = 0, + bit isLdsOpc = 0, list<dag> pattern=[], // Workaround bug bz30254 int addrKindCopy = addrKind, RegisterClass vdata_rc = getVregSrcForVT<vdata_vt>.ret, RegisterOperand vdata_op = getLdStRegisterOperand<vdata_rc>.ret> : MUBUF_Pseudo<opName, - (outs vdata_op:$vdata), + !if(!or(isLds, isLdsOpc), (outs), (outs vdata_op:$vdata)), !con(getMUBUFIns<addrKindCopy, [], isLds>.ret, !if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))), - " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" # + !if(!or(isLds, isLdsOpc), " ", " $vdata, ") # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" # !if(isLds, " lds", "$tfe") # "$swz", pattern>, MUBUF_SetupAddr<addrKindCopy> { @@ -504,13 +502,16 @@ class MUBUF_Load_Pseudo <string opName, let AsmMatchConverter = !if(isLds, "cvtMubufLds", "cvtMubuf"); let Constraints = !if(HasTiedDest, "$vdata = $vdata_in", ""); + let LGKM_CNT = isLds; + let has_vdata = !not(isLdsOpc); let mayLoad = 1; - let mayStore = 0; + let mayStore = isLds; let maybeAtomic = 1; - let Uses = !if(isLds, [EXEC, M0], [EXEC]); + let Uses = !if(!or(isLds, isLdsOpc) , [EXEC, M0], [EXEC]); let has_tfe = !not(isLds); let lds = isLds; let elements = getMUBUFElements<vdata_vt>.ret; + let VALU = isLds; } class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat < @@ -563,6 +564,20 @@ multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> { defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>; } +multiclass MUBUF_Pseudo_Loads_LDSOpc<string opName, + ValueType load_vt = i32, + bit TiedDest = 0, + bit isLds = 0, + bit isLdsOpc = 1> { + + defvar legal_load_vt = !if(!eq(!cast<string>(load_vt), !cast<string>(v3f16)), v4f16, load_vt); + + def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, legal_load_vt, TiedDest, isLds, isLdsOpc>; + def _OFFEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, legal_load_vt, TiedDest, isLds, isLdsOpc>; + def _IDXEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, legal_load_vt, TiedDest, isLds, isLdsOpc>; + def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, legal_load_vt, TiedDest, isLds, isLdsOpc>; +} + class MUBUF_Store_Pseudo <string opName, int addrKind, ValueType store_vt, @@ -615,7 +630,8 @@ class MUBUF_Pseudo_Store_Lds<string opName> (outs), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz), " $srsrc, $soffset$offset lds$cpol$swz"> { - let mayLoad = 0; + let LGKM_CNT = 1; + let mayLoad = 1; let mayStore = 1; let maybeAtomic = 1; @@ -623,6 +639,7 @@ class MUBUF_Pseudo_Store_Lds<string opName> let has_vaddr = 0; let has_tfe = 0; let lds = 1; + let VALU = 1; let Uses = [EXEC, M0]; let AsmMatchConverter = "cvtMubufLds"; @@ -785,7 +802,7 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName, multiclass MUBUF_Pseudo_Atomics <string opName, RegisterClass vdataClass, ValueType vdataType, - SDPatternOperator atomic> : + SDPatternOperator atomic = null_frag> : MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType>, MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>; @@ -898,6 +915,29 @@ defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < "buffer_load_dwordx4", v4i32 >; +defm BUFFER_LOAD_LDS_B32 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_b32", i32 +>; +defm BUFFER_LOAD_LDS_FORMAT_X : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_format_x", f32 +>; +defm BUFFER_LOAD_LDS_I8 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_i8", i32 +>; +defm BUFFER_LOAD_LDS_I16 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_i16", i32 +>; +defm BUFFER_LOAD_LDS_U8 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_u8", i32 +>; +defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc < + "buffer_load_lds_u16", i32 +>; + +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, atomic_load_16_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>; @@ -909,21 +949,6 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; -// This is not described in AMD documentation, -// but 'lds' versions of these opcodes are available -// in at least GFX8+ chips. See Bug 37653. -let SubtargetPredicate = isGFX8GFX9 in { -defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx2", v2i32, 0, 1 ->; -defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx3", v3i32, 0, 1 ->; -defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads < - "buffer_load_dwordx4", v4i32, 0, 1 ->; -} - defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores < "buffer_store_byte", i32, truncstorei8_global >; @@ -943,82 +968,82 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores < "buffer_store_dwordx4", v4i32, store_global >; defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global_32 + "buffer_atomic_swap", VGPR_32, i32 >; defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics < - "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag + "buffer_atomic_cmpswap", VReg_64, v2i32 >; defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics < - "buffer_atomic_add", VGPR_32, i32, atomic_load_add_global_32 + "buffer_atomic_add", VGPR_32, i32 >; defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub", VGPR_32, i32, atomic_load_sub_global_32 + "buffer_atomic_sub", VGPR_32, i32 >; defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin", VGPR_32, i32, atomic_load_min_global_32 + "buffer_atomic_smin", VGPR_32, i32 >; defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin", VGPR_32, i32, atomic_load_umin_global_32 + "buffer_atomic_umin", VGPR_32, i32 >; defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax", VGPR_32, i32, atomic_load_max_global_32 + "buffer_atomic_smax", VGPR_32, i32 >; defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax", VGPR_32, i32, atomic_load_umax_global_32 + "buffer_atomic_umax", VGPR_32, i32 >; defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics < - "buffer_atomic_and", VGPR_32, i32, atomic_load_and_global_32 + "buffer_atomic_and", VGPR_32, i32 >; defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics < - "buffer_atomic_or", VGPR_32, i32, atomic_load_or_global_32 + "buffer_atomic_or", VGPR_32, i32 >; defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor", VGPR_32, i32, atomic_load_xor_global_32 + "buffer_atomic_xor", VGPR_32, i32 >; defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global_32 + "buffer_atomic_inc", VGPR_32, i32 >; defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global_32 + "buffer_atomic_dec", VGPR_32, i32 >; defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global_64 + "buffer_atomic_swap_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag + "buffer_atomic_cmpswap_x2", VReg_128, v2i64 >; defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_add_x2", VReg_64, i64, atomic_load_add_global_64 + "buffer_atomic_add_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_sub_x2", VReg_64, i64, atomic_load_sub_global_64 + "buffer_atomic_sub_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smin_x2", VReg_64, i64, atomic_load_min_global_64 + "buffer_atomic_smin_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umin_x2", VReg_64, i64, atomic_load_umin_global_64 + "buffer_atomic_umin_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_smax_x2", VReg_64, i64, atomic_load_max_global_64 + "buffer_atomic_smax_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_umax_x2", VReg_64, i64, atomic_load_umax_global_64 + "buffer_atomic_umax_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_and_x2", VReg_64, i64, atomic_load_and_global_64 + "buffer_atomic_and_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_or_x2", VReg_64, i64, atomic_load_or_global_64 + "buffer_atomic_or_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_xor_x2", VReg_64, i64, atomic_load_xor_global_64 + "buffer_atomic_xor_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global_64 + "buffer_atomic_inc_x2", VReg_64, i64 >; defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics < - "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global_64 + "buffer_atomic_dec_x2", VReg_64, i64 >; let SubtargetPredicate = HasGFX10_BEncoding in @@ -1040,7 +1065,7 @@ def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; } -let SubtargetPredicate = isGFX6GFX7GFX10 in { +let SubtargetPredicate = isGFX6GFX7GFX10Plus in { defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Pseudo_Atomics < "buffer_atomic_fcmpswap", VReg_64, v2f32, null_frag @@ -1051,6 +1076,11 @@ defm BUFFER_ATOMIC_FMIN : MUBUF_Pseudo_Atomics < defm BUFFER_ATOMIC_FMAX : MUBUF_Pseudo_Atomics < "buffer_atomic_fmax", VGPR_32, f32, null_frag >; + +} + +let SubtargetPredicate = isGFX6GFX7GFX10 in { + defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics < "buffer_atomic_fcmpswap_x2", VReg_128, v2f64, null_frag >; @@ -1109,23 +1139,25 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores < def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>; -let SubtargetPredicate = HasAtomicFaddInsts in { -defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN< "buffer_atomic_add_f32", VGPR_32, f32 >; + +let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < "buffer_atomic_pk_add_f16", VGPR_32, v2f16 >; -let OtherPredicates = [isGFX90APlus] in { -defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN < +let OtherPredicates = [HasAtomicFaddRtnInsts] in +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN< "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_32 >; + +let OtherPredicates = [isGFX90APlus] in defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN < "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_32 >; -} -} // End SubtargetPredicate = HasAtomicFaddInsts //===----------------------------------------------------------------------===// // MTBUF Instructions @@ -1175,15 +1207,28 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", let SubtargetPredicate = isGFX90APlus in { def BUFFER_WBL2 : MUBUF_Invalidate<"buffer_wbl2"> { + let has_glc = 1; + let has_sccb = 1; + let InOperandList = (ins CPol_0:$cpol); + let AsmOperands = "$cpol"; } def BUFFER_INVL2 : MUBUF_Invalidate<"buffer_invl2"> { + let SubtargetPredicate = isGFX90AOnly; } - defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; - defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>; - defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; + defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_add_f64", VReg_64, f64>; + defm BUFFER_ATOMIC_MIN_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_min_f64", VReg_64, f64>; + defm BUFFER_ATOMIC_MAX_F64 : MUBUF_Pseudo_Atomics<"buffer_atomic_max_f64", VReg_64, f64>; } // End SubtargetPredicate = isGFX90APlus +def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> { + let SubtargetPredicate = isGFX940Plus; + let has_glc = 1; + let has_sccb = 1; + let InOperandList = (ins CPol_0:$cpol); + let AsmOperands = "$cpol"; +} + let SubtargetPredicate = isGFX10Plus in { def BUFFER_GL0_INV : MUBUF_Invalidate<"buffer_gl0_inv">; def BUFFER_GL1_INV : MUBUF_Invalidate<"buffer_gl1_inv">; @@ -1364,75 +1409,169 @@ defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_short, i32, "BUFFER_STORE_SHORT">; // buffer_atomic patterns //===----------------------------------------------------------------------===// -multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt, - string opcode> { +multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isIntr = 0> { + foreach RtnMode = ["ret", "noret"] in { + + defvar Op = !cast<SDPatternOperator>(OpPrefix # "_" # RtnMode + # !if(isIntr, "", "_" # vt.Size)); + defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + + def : GCNPat< + (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vt:$vdata_in)), + (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in, + SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset) + >; + + def : GCNPat< + (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), + vt:$vdata_in)), + (!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in, + VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset) + >; + + } // end foreach RtnMode +} + +multiclass BufferAtomicIntrPat<string OpPrefix, ValueType vt, string Inst> { + defm : BufferAtomicPat<OpPrefix, vt, Inst, /* isIntr */ 1>; +} + +multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst> { + foreach RtnMode = ["ret", "noret"] in { + + defvar Op = !cast<SDPatternOperator>("AMDGPUatomic_cmp_swap_global_" # RtnMode + # "_" # vt.Size); + defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); + + defvar OffsetResDag = (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) + getVregSrcForVT<data_vt>.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset, + offset:$offset); + def : GCNPat< + (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), data_vt:$vdata_in)), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS OffsetResDag, getVregSrcForVT<data_vt>.ret)), + !if(!eq(vt, i32), sub0, sub0_sub1)), + OffsetResDag) + >; + + defvar Addr64ResDag = (!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix) + getVregSrcForVT<data_vt>.ret:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, + SCSrc_b32:$soffset, offset:$offset); + def : GCNPat< + (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), + data_vt:$vdata_in)), + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (vt (COPY_TO_REGCLASS Addr64ResDag, getVregSrcForVT<data_vt>.ret)), + !if(!eq(vt, i32), sub0, sub0_sub1)), + Addr64ResDag) + >; + + } // end foreach RtnMode +} + +foreach Ty = [i32, i64] in { + +defvar Suffix = !if(!eq(Ty, i64), "_X2", ""); + +defm : BufferAtomicPat<"atomic_swap_global", Ty, "BUFFER_ATOMIC_SWAP" # Suffix>; +defm : BufferAtomicPat<"atomic_load_add_global", Ty, "BUFFER_ATOMIC_ADD" # Suffix>; +defm : BufferAtomicPat<"atomic_load_sub_global", Ty, "BUFFER_ATOMIC_SUB" # Suffix>; +defm : BufferAtomicPat<"atomic_load_min_global", Ty, "BUFFER_ATOMIC_SMIN" # Suffix>; +defm : BufferAtomicPat<"atomic_load_umin_global", Ty, "BUFFER_ATOMIC_UMIN" # Suffix>; +defm : BufferAtomicPat<"atomic_load_max_global", Ty, "BUFFER_ATOMIC_SMAX" # Suffix>; +defm : BufferAtomicPat<"atomic_load_umax_global", Ty, "BUFFER_ATOMIC_UMAX" # Suffix>; +defm : BufferAtomicPat<"atomic_load_and_global", Ty, "BUFFER_ATOMIC_AND" # Suffix>; +defm : BufferAtomicPat<"atomic_load_or_global", Ty, "BUFFER_ATOMIC_OR" # Suffix>; +defm : BufferAtomicPat<"atomic_load_xor_global", Ty, "BUFFER_ATOMIC_XOR" # Suffix>; +defm : BufferAtomicPat<"atomic_inc_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>; +defm : BufferAtomicPat<"atomic_dec_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>; + +} // end foreach Ty + +defm : BufferAtomicCmpSwapPat<i32, v2i32, "BUFFER_ATOMIC_CMPSWAP">; +defm : BufferAtomicCmpSwapPat<i64, v2i64, "BUFFER_ATOMIC_CMPSWAP_X2">; + +multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst, + list<string> RtnModes = ["ret", "noret"]> { + foreach RtnMode = RtnModes in { + + defvar Op = !cast<SDPatternOperator>(!if(!eq(RtnMode, "none"), + OpPrefix, OpPrefix # "_" # RtnMode)); + defvar InstSuffix = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")), + "_RTN", ""); + defvar CachePolicy = !if(!or(!eq(RtnMode, "none"), !eq(RtnMode, "ret")), + (set_glc $cachepolicy), (timm:$cachepolicy)); + def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, 0)), - (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) + (!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (set_glc $cachepolicy)) + (as_i16timm $offset), CachePolicy) >; def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, timm)), - (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in, - VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (set_glc $cachepolicy)) + (!cast<MUBUF_Pseudo>(Inst # "_IDXEN" # InstSuffix) + getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, + SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy) >; def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, 0)), - (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in, - VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, - (as_i16timm $offset), (set_glc $cachepolicy)) + (!cast<MUBUF_Pseudo>(Inst # "_OFFEN" # InstSuffix) + getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, + SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy) >; def : GCNPat< - (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset, + (vt (Op vt:$vdata_in, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, timm)), - (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN) + (!cast<MUBUF_Pseudo>(Inst # "_BOTHEN" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in, (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)) + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy) >; + + } // end foreach RtnMode } -defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i32, "BUFFER_ATOMIC_SWAP">; -defm : BufferAtomicPatterns<SIbuffer_atomic_swap, f32, "BUFFER_ATOMIC_SWAP">; -defm : BufferAtomicPatterns<SIbuffer_atomic_add, i32, "BUFFER_ATOMIC_ADD">; -defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i32, "BUFFER_ATOMIC_SUB">; -defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i32, "BUFFER_ATOMIC_SMIN">; -defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i32, "BUFFER_ATOMIC_UMIN">; -defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i32, "BUFFER_ATOMIC_SMAX">; -defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i32, "BUFFER_ATOMIC_UMAX">; -defm : BufferAtomicPatterns<SIbuffer_atomic_and, i32, "BUFFER_ATOMIC_AND">; -defm : BufferAtomicPatterns<SIbuffer_atomic_or, i32, "BUFFER_ATOMIC_OR">; -defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i32, "BUFFER_ATOMIC_XOR">; -defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i32, "BUFFER_ATOMIC_INC">; -defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i32, "BUFFER_ATOMIC_DEC">; -defm : BufferAtomicPatterns<SIbuffer_atomic_csub, i32, "BUFFER_ATOMIC_CSUB">; -defm : BufferAtomicPatterns<SIbuffer_atomic_swap, i64, "BUFFER_ATOMIC_SWAP_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_add, i64, "BUFFER_ATOMIC_ADD_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_sub, i64, "BUFFER_ATOMIC_SUB_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_smin, i64, "BUFFER_ATOMIC_SMIN_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_umin, i64, "BUFFER_ATOMIC_UMIN_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_smax, i64, "BUFFER_ATOMIC_SMAX_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_umax, i64, "BUFFER_ATOMIC_UMAX_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_and, i64, "BUFFER_ATOMIC_AND_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_or, i64, "BUFFER_ATOMIC_OR_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">; -defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i32, "BUFFER_ATOMIC_SWAP">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", f32, "BUFFER_ATOMIC_SWAP">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i32, "BUFFER_ATOMIC_ADD">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i32, "BUFFER_ATOMIC_SUB">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smin", i32, "BUFFER_ATOMIC_SMIN">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umin", i32, "BUFFER_ATOMIC_UMIN">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smax", i32, "BUFFER_ATOMIC_SMAX">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umax", i32, "BUFFER_ATOMIC_UMAX">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_and", i32, "BUFFER_ATOMIC_AND">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i32, "BUFFER_ATOMIC_OR">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i32, "BUFFER_ATOMIC_XOR">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i32, "BUFFER_ATOMIC_INC">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i32, "BUFFER_ATOMIC_DEC">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["none"]>; +defm : SIBufferAtomicPat<"SIbuffer_atomic_swap", i64, "BUFFER_ATOMIC_SWAP_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_add", i64, "BUFFER_ATOMIC_ADD_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_sub", i64, "BUFFER_ATOMIC_SUB_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smin", i64, "BUFFER_ATOMIC_SMIN_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umin", i64, "BUFFER_ATOMIC_UMIN_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_smax", i64, "BUFFER_ATOMIC_SMAX_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_umax", i64, "BUFFER_ATOMIC_UMAX_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_and", i64, "BUFFER_ATOMIC_AND_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_or", i64, "BUFFER_ATOMIC_OR_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">; +defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">; +let SubtargetPredicate = isGFX6GFX7GFX10Plus in { + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">; +} let SubtargetPredicate = isGFX6GFX7GFX10 in { - defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f32, "BUFFER_ATOMIC_FMIN">; - defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f32, "BUFFER_ATOMIC_FMAX">; - defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_FMIN_X2">; - defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_FMAX_X2">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_FMIN_X2">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_FMAX_X2">; } class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag < @@ -1482,71 +1621,89 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt, >; } -let SubtargetPredicate = HasAtomicFaddInsts in { +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">; + +let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; -} + +let SubtargetPredicate = HasAtomicFaddRtnInsts in + defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32">; let SubtargetPredicate = isGFX90APlus in { - defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f32, "BUFFER_ATOMIC_ADD_F32">; - defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; + defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; + defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; + defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16">; - defm : BufferAtomicPatterns<SIbuffer_atomic_fadd, f64, "BUFFER_ATOMIC_ADD_F64">; - defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_MIN_F64">; - defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_MAX_F64">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">; + defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">; } // End SubtargetPredicate = isGFX90APlus +foreach RtnMode = ["ret", "noret"] in { + +defvar Op = !cast<SDPatternOperator>(SIbuffer_atomic_cmpswap # "_" # RtnMode); +defvar InstSuffix = !if(!eq(RtnMode, "ret"), "_RTN", ""); +defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy), + (timm:$cachepolicy)); + +defvar OffsetResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFSET" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffsetResDag, VReg_64)), sub0), + OffsetResDag) >; +defvar IdxenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_IDXEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), + CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, timm:$cachepolicy, timm), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), - sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS IdxenResDag, VReg_64)), sub0), + IdxenResDag) >; +defvar OffenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), + CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, 0), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), - sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS OffenResDag, VReg_64)), sub0), + OffenResDag) >; +defvar BothenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_BOTHEN" # InstSuffix) + (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), + (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), + SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy); def : GCNPat< - (SIbuffer_atomic_cmpswap + (Op i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, timm:$cachepolicy, timm), - (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS - (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN - (REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1), - (REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1), - SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), - (set_glc $cachepolicy)), VReg_64)), - sub0) + !if(!eq(RtnMode, "ret"), + (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS BothenResDag, VReg_64)), sub0), + BothenResDag) >; +} // end foreach RtnMode + class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt, PatFrag constant_ld> : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, @@ -1682,8 +1839,12 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In >; } let SubtargetPredicate = isGFX6GFX7 in { -defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_global_32>; -defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_global_64>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, i32, atomic_store_8_global>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_BYTE_ADDR64, BUFFER_STORE_BYTE_OFFSET, i16, atomic_store_8_global>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, i32, atomic_store_16_global>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_SHORT_ADDR64, BUFFER_STORE_SHORT_OFFSET, i16, atomic_store_16_global>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, atomic_store_32_global>; +defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, atomic_store_64_global>; } // End Predicates = isGFX6GFX7 @@ -1731,7 +1892,7 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX3_OFFEN, BUFFER_STORE_DWORDX3_OF defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private, VReg_128>; -let OtherPredicates = [D16PreservesUnusedBits, DisableFlatScratch] in { +let OtherPredicates = [HasD16LoadStore, DisableFlatScratch] in { // Hiding the extract high pattern in the PatFrag seems to not // automatically increase the complexity. let AddedComplexity = 1 in { @@ -1882,24 +2043,41 @@ let SubtargetPredicate = HasPackedD16VMem in { //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Base ENC_MUBUF for GFX6, GFX7, GFX10. +// Base ENC_MUBUF for GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// -class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> : - MUBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { +class Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11 <MUBUF_Pseudo ps, int ef, + string real_name = ps.Mnemonic> : + MUBUF_Real<ps, real_name>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { let Inst{11-0} = !if(ps.has_offset, offset, ?); + let Inst{31-26} = 0x38; + let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); + let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); + let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +class MUBUF_Real_gfx11<bits<8> op, MUBUF_Pseudo ps, + string real_name = ps.Mnemonic> : + Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, SIEncodingFamily.GFX11, real_name> { + let Inst{12} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); + let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); + let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); + let Inst{25-18} = op; + let Inst{53} = !if(ps.has_tfe, tfe, ?); + let Inst{54} = ps.offen; + let Inst{55} = ps.idxen; +} + +class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> : + Base_MUBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> { let Inst{12} = ps.offen; let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); let Inst{16} = ps.lds; let Inst{24-18} = op; - let Inst{31-26} = 0x38; - let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); - let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); - let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); - let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> : @@ -1914,10 +2092,155 @@ class MUBUF_Real_gfx6_gfx7<bits<8> op, MUBUF_Pseudo ps> : } //===----------------------------------------------------------------------===// +// MUBUF - GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in +multiclass MUBUF_Real_AllAddr_gfx11_Renamed_Impl<bits<8> op, string real_name> { + def _BOTHEN_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN"), real_name>, + AtomicNoRet<NAME # "_BOTHEN_gfx11", 0>; + def _IDXEN_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN"), real_name>, + AtomicNoRet<NAME # "_IDXEN_gfx11", 0>; + def _OFFEN_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN"), real_name>, + AtomicNoRet<NAME # "_OFFEN_gfx11", 0>; + def _OFFSET_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET"), real_name>, + AtomicNoRet<NAME # "_OFFSET_gfx11", 0>; +} + +multiclass MUBUF_Real_AllAddr_gfx11_Impl<bits<8> op, MUBUF_Pseudo ps> : + MUBUF_Real_AllAddr_gfx11_Renamed_Impl<op, ps.Mnemonic>; +multiclass MUBUF_Real_AllAddr_gfx11<bits<8> op> : + MUBUF_Real_AllAddr_gfx11_Impl<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; + +class Pre_gfx11_MUBUF_Name <MUBUF_Pseudo ps, string real_name> : + MnemonicAlias<ps.Mnemonic, real_name>, Requires<[isGFX11Plus]>; +multiclass MUBUF_Real_AllAddr_gfx11_Renamed<bits<8> op, string real_name> : + MUBUF_Real_AllAddr_gfx11_Renamed_Impl<op, real_name> { + def : Pre_gfx11_MUBUF_Name<!cast<MUBUF_Pseudo>(NAME#"_BOTHEN"), real_name>; +} + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in +multiclass MUBUF_Real_Atomics_RTN_gfx11_Renamed<bits<8> op, string real_name> { + def _BOTHEN_RTN_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN"), real_name>, + AtomicNoRet<NAME # "_BOTHEN_gfx11", 1>; + def _IDXEN_RTN_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN"), real_name>, + AtomicNoRet<NAME # "_IDXEN_gfx11", 1>; + def _OFFEN_RTN_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN"), real_name>, + AtomicNoRet<NAME # "_OFFEN_gfx11", 1>; + def _OFFSET_RTN_gfx11 : + MUBUF_Real_gfx11<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN"), real_name>, + AtomicNoRet<NAME # "_OFFSET_gfx11", 1>; +} + +multiclass MUBUF_Real_Atomics_RTN_gfx11_impl<bits<8> op, MUBUF_Pseudo ps> : + MUBUF_Real_Atomics_RTN_gfx11_Renamed<op, ps.Mnemonic>; +multiclass MUBUF_Real_Atomics_RTN_gfx11<bits<8> op> : + MUBUF_Real_Atomics_RTN_gfx11_impl<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; + +multiclass MUBUF_Real_Atomics_gfx11<bits<8> op> : + MUBUF_Real_AllAddr_gfx11<op>, + MUBUF_Real_Atomics_RTN_gfx11<op>; + +multiclass MUBUF_Real_Atomics_gfx11_Renamed<bits<8> op, string real_name> : + MUBUF_Real_AllAddr_gfx11_Renamed<op, real_name>, + MUBUF_Real_Atomics_RTN_gfx11_Renamed<op, real_name>; + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { +def BUFFER_GL0_INV_gfx11 : MUBUF_Real_gfx11<0x02B, BUFFER_GL0_INV>; +def BUFFER_GL1_INV_gfx11 : MUBUF_Real_gfx11<0x02C, BUFFER_GL1_INV>; +} + +defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x014, "buffer_load_b32">; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x015, "buffer_load_b64">; +defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x016, "buffer_load_b96">; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x017, "buffer_load_b128">; +defm BUFFER_LOAD_SHORT_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x020, "buffer_load_d16_b16">; +defm BUFFER_LOAD_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x008, "buffer_load_d16_format_x">; +defm BUFFER_LOAD_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x009, "buffer_load_d16_format_xy">; +defm BUFFER_LOAD_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00a, "buffer_load_d16_format_xyz">; +defm BUFFER_LOAD_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00b, "buffer_load_d16_format_xyzw">; +defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x023, "buffer_load_d16_hi_b16">; +defm BUFFER_LOAD_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x026, "buffer_load_d16_hi_format_x">; +defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x022, "buffer_load_d16_hi_i8">; +defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x021, "buffer_load_d16_hi_u8">; +defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01f, "buffer_load_d16_i8">; +defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01e, "buffer_load_d16_u8">; +defm BUFFER_LOAD_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x000>; +defm BUFFER_LOAD_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x001>; +defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x002>; +defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x003>; +defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x011, "buffer_load_i8">; +defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x013, "buffer_load_i16">; +defm BUFFER_LOAD_UBYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x010, "buffer_load_u8">; +defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x012, "buffer_load_u16">; +defm BUFFER_LOAD_LDS_B32 : MUBUF_Real_AllAddr_gfx11<0x031>; +defm BUFFER_LOAD_LDS_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x032>; +defm BUFFER_LOAD_LDS_I8 : MUBUF_Real_AllAddr_gfx11<0x02e>; +defm BUFFER_LOAD_LDS_I16 : MUBUF_Real_AllAddr_gfx11<0x030>; +defm BUFFER_LOAD_LDS_U8 : MUBUF_Real_AllAddr_gfx11<0x02d>; +defm BUFFER_LOAD_LDS_U16 : MUBUF_Real_AllAddr_gfx11<0x02f>; +defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_gfx11_Renamed<0x018, "buffer_store_b8">; +defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_gfx11_Renamed<0x019, "buffer_store_b16">; +defm BUFFER_STORE_DWORD : MUBUF_Real_AllAddr_gfx11_Renamed<0x01A, "buffer_store_b32">; +defm BUFFER_STORE_DWORDX2 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01B, "buffer_store_b64">; +defm BUFFER_STORE_DWORDX3 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01C, "buffer_store_b96">; +defm BUFFER_STORE_DWORDX4 : MUBUF_Real_AllAddr_gfx11_Renamed<0x01D, "buffer_store_b128">; +defm BUFFER_STORE_FORMAT_D16_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x00C, "buffer_store_d16_format_x">; +defm BUFFER_STORE_FORMAT_D16_XY : MUBUF_Real_AllAddr_gfx11_Renamed<0x00D, "buffer_store_d16_format_xy">; +defm BUFFER_STORE_FORMAT_D16_XYZ : MUBUF_Real_AllAddr_gfx11_Renamed<0x00E, "buffer_store_d16_format_xyz">; +defm BUFFER_STORE_FORMAT_D16_XYZW : MUBUF_Real_AllAddr_gfx11_Renamed<0x00F, "buffer_store_d16_format_xyzw">; +defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x024, "buffer_store_d16_hi_b8">; +defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx11_Renamed<0x025, "buffer_store_d16_hi_b16">; +defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Real_AllAddr_gfx11_Renamed<0x027, "buffer_store_d16_hi_format_x">; +defm BUFFER_STORE_FORMAT_X : MUBUF_Real_AllAddr_gfx11<0x004>; +defm BUFFER_STORE_FORMAT_XY : MUBUF_Real_AllAddr_gfx11<0x005>; +defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Real_AllAddr_gfx11<0x006>; +defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Real_AllAddr_gfx11<0x007>; +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomics_gfx11<0x056>; +defm BUFFER_ATOMIC_ADD : MUBUF_Real_Atomics_gfx11_Renamed<0x035, "buffer_atomic_add_u32">; +defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x043, "buffer_atomic_add_u64">; +defm BUFFER_ATOMIC_AND : MUBUF_Real_Atomics_gfx11_Renamed<0x03C, "buffer_atomic_and_b32">; +defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x049, "buffer_atomic_and_b64">; +defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x034, "buffer_atomic_cmpswap_b32">; +defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x042, "buffer_atomic_cmpswap_b64">; +defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x050, "buffer_atomic_cmpswap_f32">; +defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx11_Renamed<0x037, "buffer_atomic_csub_u32">; +def : MnemonicAlias<"buffer_atomic_csub", "buffer_atomic_csub_u32">, Requires<[isGFX11Plus]>; +defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomics_gfx11_Renamed<0x040, "buffer_atomic_dec_u32">; +defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04D, "buffer_atomic_dec_u64">; +defm BUFFER_ATOMIC_INC : MUBUF_Real_Atomics_gfx11_Renamed<0x03F, "buffer_atomic_inc_u32">; +defm BUFFER_ATOMIC_INC_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04C, "buffer_atomic_inc_u64">; +defm BUFFER_ATOMIC_FMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x052, "buffer_atomic_max_f32">; +defm BUFFER_ATOMIC_SMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x03A, "buffer_atomic_max_i32">; +defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x047, "buffer_atomic_max_i64">; +defm BUFFER_ATOMIC_UMAX : MUBUF_Real_Atomics_gfx11_Renamed<0x03B, "buffer_atomic_max_u32">; +defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x048, "buffer_atomic_max_u64">; +defm BUFFER_ATOMIC_FMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x051, "buffer_atomic_min_f32">; +defm BUFFER_ATOMIC_SMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x038, "buffer_atomic_min_i32">; +defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x045, "buffer_atomic_min_i64">; +defm BUFFER_ATOMIC_UMIN : MUBUF_Real_Atomics_gfx11_Renamed<0x039, "buffer_atomic_min_u32">; +defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x046, "buffer_atomic_min_u64">; +defm BUFFER_ATOMIC_OR : MUBUF_Real_Atomics_gfx11_Renamed<0x03D, "buffer_atomic_or_b32">; +defm BUFFER_ATOMIC_OR_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04A, "buffer_atomic_or_b64">; +defm BUFFER_ATOMIC_SUB : MUBUF_Real_Atomics_gfx11_Renamed<0x036, "buffer_atomic_sub_u32">; +defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x044, "buffer_atomic_sub_u64">; +defm BUFFER_ATOMIC_SWAP : MUBUF_Real_Atomics_gfx11_Renamed<0x033, "buffer_atomic_swap_b32">; +defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x041, "buffer_atomic_swap_b64">; +defm BUFFER_ATOMIC_XOR : MUBUF_Real_Atomics_gfx11_Renamed<0x03E, "buffer_atomic_xor_b32">; +defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Real_Atomics_gfx11_Renamed<0x04B, "buffer_atomic_xor_b64">; + +//===----------------------------------------------------------------------===// // MUBUF - GFX10. //===----------------------------------------------------------------------===// -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass MUBUF_Real_AllAddr_gfx10<bits<8> op> { def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; @@ -1929,23 +2252,15 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; } multiclass MUBUF_Real_AllAddr_Lds_gfx10<bits<8> op> { - def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_gfx10">; - def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_gfx10">; - def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_gfx10">; - def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_gfx10">; + def _OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; + def _OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; + def _IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; + def _BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; - def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_gfx10">; - def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_gfx10">; - def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_gfx10">; - def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_gfx10">; + def _LDS_OFFSET_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>; + def _LDS_OFFEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_gfx10 : MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>; } multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> { def _BOTHEN_RTN_gfx10 : @@ -1976,7 +2291,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, AtomicNoRet<NAME # "_OFFSET_gfx10", 0>; } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>; defm BUFFER_STORE_SHORT_D16_HI : MUBUF_Real_AllAddr_gfx10<0x01b>; @@ -2033,27 +2348,17 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; } multiclass MUBUF_Real_AllAddr_Lds_gfx6_gfx7<bits<8> op> { - def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_gfx6_gfx7">; - def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>, - MUBUFLdsTable<0, NAME # "_ADDR64_gfx6_gfx7">; - def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_gfx6_gfx7">; - def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_gfx6_gfx7">; - def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_gfx6_gfx7">; + def _OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; + def _ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>; + def _OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; + def _IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; + def _BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; - def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_gfx6_gfx7">; - def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>, - MUBUFLdsTable<1, NAME # "_ADDR64_gfx6_gfx7">; - def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_gfx6_gfx7">; - def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_gfx6_gfx7">; - def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">; + def _LDS_OFFSET_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>; + def _LDS_ADDR64_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_ADDR64")>; + def _LDS_OFFEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>; } multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> { def _ADDR64_gfx6_gfx7 : @@ -2167,26 +2472,89 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>; def BUFFER_WBINVL1_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<0x071, BUFFER_WBINVL1>; //===----------------------------------------------------------------------===// -// Base ENC_MTBUF for GFX6, GFX7, GFX10. +// Base ENC_MTBUF for GFX6, GFX7, GFX10, GFX11. //===----------------------------------------------------------------------===// -class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> : - MTBUF_Real<ps>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { +class Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11<MTBUF_Pseudo ps, int ef, + string real_name = ps.Mnemonic> : + MTBUF_Real<ps, real_name>, Enc64, SIMCInstr<ps.PseudoInstr, ef> { let Inst{11-0} = !if(ps.has_offset, offset, ?); - let Inst{12} = ps.offen; - let Inst{13} = ps.idxen; let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value); - let Inst{18-16} = op; let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?); let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?); + let Inst{63-56} = !if(ps.has_soffset, soffset, ?); +} + +class Base_MTBUF_Real_gfx11<bits<4> op, MTBUF_Pseudo ps, + string real_name = ps.Mnemonic> : + Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, SIEncodingFamily.GFX11, real_name> { + let Inst{12} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); + let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value); + let Inst{18-15} = op; + let Inst{25-19} = format; + let Inst{53} = !if(ps.has_tfe, tfe, ?); + let Inst{54} = ps.offen; + let Inst{55} = ps.idxen; +} + +class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> : + Base_MTBUF_Real_gfx6_gfx7_gfx10_gfx11<ps, ef> { + let Inst{12} = ps.offen; + let Inst{13} = ps.idxen; + let Inst{18-16} = op; let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?); let Inst{55} = !if(ps.has_tfe, tfe, ?); - let Inst{63-56} = !if(ps.has_soffset, soffset, ?); } //===----------------------------------------------------------------------===// +// MTBUF - GFX11. +//===----------------------------------------------------------------------===// + +let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in +multiclass MTBUF_Real_AllAddr_gfx11_Renamed_Impl<bits<4> op, string real_name> { + def _BOTHEN_gfx11 : + Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN"), real_name>; + def _IDXEN_gfx11 : + Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_IDXEN"), real_name>; + def _OFFEN_gfx11 : + Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFEN"), real_name>; + def _OFFSET_gfx11 : + Base_MTBUF_Real_gfx11<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET"), real_name>; +} + +multiclass MTBUF_Real_AllAddr_gfx11_Impl<bits<4> op, MTBUF_Pseudo ps> + : MTBUF_Real_AllAddr_gfx11_Renamed_Impl<op, ps.Mnemonic>; +multiclass MTBUF_Real_AllAddr_gfx11<bits<4> op> + : MTBUF_Real_AllAddr_gfx11_Impl<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; + + +class Pre_gfx11_MTBUF_Name <MTBUF_Pseudo ps, string real_name> + : MnemonicAlias<ps.Mnemonic, real_name>, Requires<[isGFX11Plus]>; +multiclass MTBUF_Real_AllAddr_gfx11_Renamed<bits<4> op, string real_name> + : MTBUF_Real_AllAddr_gfx11_Renamed_Impl<op, real_name> { + def : Pre_gfx11_MTBUF_Name<!cast<MTBUF_Pseudo>(NAME#"_BOTHEN"), real_name>; +} + +defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x008, "tbuffer_load_d16_format_x">; +defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x009, "tbuffer_load_d16_format_xy">; +defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00a, "tbuffer_load_d16_format_xyz">; +defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00b, "tbuffer_load_d16_format_xyzw">; +defm TBUFFER_LOAD_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x000>; +defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x001>; +defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x002>; +defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x003>; +defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx11_Renamed<0x00c, "tbuffer_store_d16_format_x">; +defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx11_Renamed<0x00d, "tbuffer_store_d16_format_xy">; +defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Real_AllAddr_gfx11_Renamed<0x00e, "tbuffer_store_d16_format_xyz">; +defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Real_AllAddr_gfx11_Renamed<0x00f, "tbuffer_store_d16_format_xyzw">; +defm TBUFFER_STORE_FORMAT_X : MTBUF_Real_AllAddr_gfx11<0x004>; +defm TBUFFER_STORE_FORMAT_XY : MTBUF_Real_AllAddr_gfx11<0x005>; +defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Real_AllAddr_gfx11<0x006>; +defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx11<0x007>; + +//===----------------------------------------------------------------------===// // MTBUF - GFX10. //===----------------------------------------------------------------------===// @@ -2197,7 +2565,7 @@ class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> : let Inst{53} = op{3}; } -let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { +let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { multiclass MTBUF_Real_AllAddr_gfx10<bits<4> op> { def _BOTHEN_gfx10 : MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_BOTHEN")>; @@ -2208,7 +2576,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { def _OFFSET_gfx10 : MTBUF_Real_gfx10<op, !cast<MTBUF_Pseudo>(NAME#"_OFFSET")>; } -} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" +} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Real_AllAddr_gfx10<0x008>; defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Real_AllAddr_gfx10<0x009>; @@ -2303,9 +2671,28 @@ class MUBUF_Real_gfx90a <bits<7> op, MUBUF_Pseudo ps, let Inst{55} = acc; } +class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> : + MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> { + let AssemblerPredicate = isGFX940Plus; + let DecoderNamespace = "GFX9"; + let AsmString = ps.Mnemonic # !subst("$tfe", "", ps.AsmOperands); + + let Inst{55} = acc; +} + multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> { def _vi : MUBUF_Real_vi<op, ps>; - def _gfx90a : MUBUF_Real_gfx90a<op, ps, !and(ps.has_sccb,!not(ps.FPAtomic))>; + + foreach _ = BoolToList<!not(ps.FPAtomic)>.ret in + def _gfx90a : MUBUF_Real_gfx90a<op, ps>; + + foreach _ = BoolToList<ps.FPAtomic>.ret in { + def _gfx90a : MUBUF_Real_gfx90a<op, ps, 0> { + let SubtargetPredicate = isGFX90AOnly; + let AssemblerPredicate = isGFX90AOnly; + } + def _gfx940 : MUBUF_Real_gfx940<op, ps>; + } } multiclass MUBUF_Real_AllAddr_vi<bits<7> op> { @@ -2317,41 +2704,25 @@ multiclass MUBUF_Real_AllAddr_vi<bits<7> op> { multiclass MUBUF_Real_AllAddr_Lds_vi<bits<7> op> { - def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_vi">; - def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_vi">; - def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_vi">; - def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_vi">; + def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; + def _OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; + def _IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; + def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; - def _LDS_OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_vi">; - def _LDS_OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_vi">; - def _LDS_IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_vi">; - def _LDS_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_vi">; + def _LDS_OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>; + def _LDS_OFFEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>; - def _OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>, - MUBUFLdsTable<0, NAME # "_OFFSET_gfx90a">; - def _OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>, - MUBUFLdsTable<0, NAME # "_OFFEN_gfx90a">; - def _IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>, - MUBUFLdsTable<0, NAME # "_IDXEN_gfx90a">; - def _BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>, - MUBUFLdsTable<0, NAME # "_BOTHEN_gfx90a">; + def _OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>; + def _OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>; + def _IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>; + def _BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>; - def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>, - MUBUFLdsTable<1, NAME # "_OFFSET_gfx90a">; - def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>, - MUBUFLdsTable<1, NAME # "_OFFEN_gfx90a">; - def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>, - MUBUFLdsTable<1, NAME # "_IDXEN_gfx90a">; - def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>, - MUBUFLdsTable<1, NAME # "_BOTHEN_gfx90a">; + def _LDS_OFFSET_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFSET")>; + def _LDS_OFFEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_OFFEN")>; + def _LDS_IDXEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_IDXEN")>; + def _LDS_BOTHEN_gfx90a : MUBUF_Real_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>; } class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> : @@ -2424,9 +2795,9 @@ defm BUFFER_LOAD_SBYTE : MUBUF_Real_AllAddr_Lds_vi <0x11>; defm BUFFER_LOAD_USHORT : MUBUF_Real_AllAddr_Lds_vi <0x12>; defm BUFFER_LOAD_SSHORT : MUBUF_Real_AllAddr_Lds_vi <0x13>; defm BUFFER_LOAD_DWORD : MUBUF_Real_AllAddr_Lds_vi <0x14>; -defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_Lds_vi <0x15>; -defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_Lds_vi <0x16>; -defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_Lds_vi <0x17>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Real_AllAddr_vi <0x15>; +defm BUFFER_LOAD_DWORDX3 : MUBUF_Real_AllAddr_vi <0x16>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Real_AllAddr_vi <0x17>; defm BUFFER_STORE_BYTE : MUBUF_Real_AllAddr_vi <0x18>; defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_vi <0x19>; defm BUFFER_STORE_SHORT : MUBUF_Real_AllAddr_vi <0x1a>; @@ -2481,12 +2852,12 @@ def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>; def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; } // End AssemblerPredicate = isGFX8GFX9 -let SubtargetPredicate = HasAtomicFaddInsts in { +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in { defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; -} // End SubtargetPredicate = HasAtomicFaddInsts +} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts let SubtargetPredicate = isGFX90APlus in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>; @@ -2495,9 +2866,17 @@ let SubtargetPredicate = isGFX90APlus in { } // End SubtargetPredicate = isGFX90APlus, AssemblerPredicate = isGFX90APlus def BUFFER_WBL2_gfx90a : MUBUF_Real_gfx90a<0x28, BUFFER_WBL2> { + let AsmString = BUFFER_WBL2.Mnemonic; // drop flags + let AssemblerPredicate = isGFX90AOnly; + let SubtargetPredicate = isGFX90AOnly; } def BUFFER_INVL2_gfx90a : MUBUF_Real_gfx90a<0x29, BUFFER_INVL2>; +let SubtargetPredicate = isGFX940Plus in { +def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>; +def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>; +} + class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> : MTBUF_Real<ps>, Enc64, |
