diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.td')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 625 |
1 files changed, 495 insertions, 130 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 713a08907e99..29ee9f12b12d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===// +//===-- SIInstrInfo.td -----------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -17,7 +17,8 @@ class GCNPredicateControl : PredicateControl { } // Except for the NONE field, this must be kept in sync with the -// SIEncodingFamily enum in AMDGPUInstrInfo.cpp +// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the +// getMCOpcodeGen table. def SIEncodingFamily { int NONE = -1; int SI = 0; @@ -29,6 +30,8 @@ def SIEncodingFamily { int GFX10 = 6; int SDWA10 = 7; int GFX90A = 8; + int GFX940 = 9; + int GFX11 = 10; } //===----------------------------------------------------------------------===// @@ -190,6 +193,44 @@ def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; +multiclass SDBufferAtomicRetNoRet { + def "_ret" : PatFrag< + (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, + node:$offset, node:$cachepolicy, node:$idxen), + (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; + let GISelPredicateCode = [{ return true; }]; + } + + def "_noret" : PatFrag< + (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, + node:$offset, node:$cachepolicy, node:$idxen), + (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; + let GISelPredicateCode = [{ return false; }]; + } +} + +defm SIbuffer_atomic_swap : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_add : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_sub : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_smin : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_umin : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_smax : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_umax : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_and : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_or : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_xor : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_inc : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_dec : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_fadd : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_fmin : SDBufferAtomicRetNoRet; +defm SIbuffer_atomic_fmax : SDBufferAtomicRetNoRet; + def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", SDTypeProfile<1, 9, [SDTCisVT<0, i32>, // dst @@ -205,6 +246,26 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] >; +def SIbuffer_atomic_cmpswap_ret : PatFrag< + (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, + node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), + (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }]; + let GISelPredicateCode = [{ return true; }]; +} + +def SIbuffer_atomic_cmpswap_noret : PatFrag< + (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, + node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), + (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, + node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, + node:$idxen)> { + let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; + let GISelPredicateCode = [{ return false; }]; +} + class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, SDTypeProfile<0, 2, [SDTCisPtrTy<0>, // vaddr @@ -255,35 +316,57 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] >; +def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", + SDTFPRoundOp +>; + +def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", + SDTFPRoundOp +>; + //===----------------------------------------------------------------------===// // ValueType helpers //===----------------------------------------------------------------------===// // Returns 1 if the source arguments have modifiers, 0 if they do not. -// XXX - do f16 instructions? class isFloatType<ValueType SrcVT> { bit ret = !or(!eq(SrcVT.Value, f16.Value), !eq(SrcVT.Value, f32.Value), !eq(SrcVT.Value, f64.Value), !eq(SrcVT.Value, v2f16.Value), !eq(SrcVT.Value, v4f16.Value), + !eq(SrcVT.Value, v8f16.Value), + !eq(SrcVT.Value, v16f16.Value), !eq(SrcVT.Value, v2f32.Value), + !eq(SrcVT.Value, v4f32.Value), + !eq(SrcVT.Value, v8f32.Value), !eq(SrcVT.Value, v2f64.Value), !eq(SrcVT.Value, v4f64.Value)); } +// XXX - do v2i16 instructions? class isIntType<ValueType SrcVT> { bit ret = !or(!eq(SrcVT.Value, i16.Value), !eq(SrcVT.Value, i32.Value), !eq(SrcVT.Value, i64.Value), - !eq(SrcVT.Value, v2i32.Value)); + !eq(SrcVT.Value, v4i16.Value), + !eq(SrcVT.Value, v8i16.Value), + !eq(SrcVT.Value, v16i16.Value), + !eq(SrcVT.Value, v2i32.Value), + !eq(SrcVT.Value, v4i32.Value), + !eq(SrcVT.Value, v8i32.Value)); } class isPackedType<ValueType SrcVT> { bit ret = !or(!eq(SrcVT.Value, v2i16.Value), !eq(SrcVT.Value, v2f16.Value), !eq(SrcVT.Value, v4f16.Value), - !eq(SrcVT.Value, v2f32.Value)); + !eq(SrcVT.Value, v2i32.Value), + !eq(SrcVT.Value, v2f32.Value), + !eq(SrcVT.Value, v4i32.Value), + !eq(SrcVT.Value, v4f32.Value), + !eq(SrcVT.Value, v8i32.Value), + !eq(SrcVT.Value, v8f32.Value)); } @@ -291,19 +374,10 @@ class isPackedType<ValueType SrcVT> { // PatFrags for global memory operations //===----------------------------------------------------------------------===// -foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { -let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { - - -defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>; -defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>; -defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>; -defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>; - - -} // End let AddressSpaces = ... -} // End foreach AddrSpace - +defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>; +defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>; +defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>; +defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>; //===----------------------------------------------------------------------===// // SDNodes PatFrags for loads/stores with a glue input. @@ -408,50 +482,36 @@ def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { let IsNonExtLoad = 1; } -let MemoryVT = i8 in { def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; -} -let MemoryVT = i16 in { def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; -} +} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces def load_align8_local_m0 : PatFrag<(ops node:$ptr), - (load_local_m0 node:$ptr)>, Aligned<8> { + (load_local_m0 node:$ptr)> { let IsLoad = 1; - let IsNonExtLoad = 1; + int MinAlignment = 8; } def load_align16_local_m0 : PatFrag<(ops node:$ptr), - (load_local_m0 node:$ptr)>, Aligned<16> { + (load_local_m0 node:$ptr)> { let IsLoad = 1; - let IsNonExtLoad = 1; + int MinAlignment = 16; } -} // End IsLoad = 1 - let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_8_glue node:$ptr)> { - let MemoryVT = i8; -} + (atomic_load_8_glue node:$ptr)>; def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_16_glue node:$ptr)> { - let MemoryVT = i16; -} + (atomic_load_16_glue node:$ptr)>; def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_32_glue node:$ptr)> { - let MemoryVT = i32; -} + (atomic_load_32_glue node:$ptr)>; def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), - (atomic_load_64_glue node:$ptr)> { - let MemoryVT = i64; -} - + (atomic_load_64_glue node:$ptr)>; } // End let AddressSpaces = LoadAddress_local.AddrSpaces @@ -485,75 +545,103 @@ def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), (truncstore_glue node:$val, node:$ptr)> { let IsStore = 1; let MemoryVT = i8; + let IsTruncStore = 1; } def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), (truncstore_glue node:$val, node:$ptr)> { let IsStore = 1; let MemoryVT = i16; + let IsTruncStore = 1; } let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), - (store_glue node:$val, node:$ptr)> { - let IsStore = 1; - let IsTruncStore = 0; -} - + (store_glue node:$val, node:$ptr)>; def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr)> { - let IsStore = 1; - let MemoryVT = i8; -} - + (truncstorei8_glue node:$val, node:$ptr)>; def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore_glue node:$val, node:$ptr)> { - let IsStore = 1; - let MemoryVT = i16; -} + (truncstorei16_glue node:$val, node:$ptr)>; } def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), (store_local_m0 node:$value, node:$ptr)>, Aligned<8> { let IsStore = 1; - let IsTruncStore = 0; } def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), (store_local_m0 node:$value, node:$ptr)>, Aligned<16> { let IsStore = 1; +} + +let PredicateCode = [{return cast<MemSDNode>(N)->getAlignment() < 4;}], + GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], + AddressSpaces = [ AddrSpaces.Local ] in { +def load_align_less_than_4_local : PatFrag<(ops node:$ptr), + (load_local node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; +} + +def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), + (load_local_m0 node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; +} + +def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), + (store_local node:$value, node:$ptr)> { + let IsStore = 1; let IsTruncStore = 0; } -let AddressSpaces = StoreAddress_local.AddrSpaces in { +def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), + (store_local_m0 node:$value, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} +} -def atomic_store_local_8_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { +def atomic_store_8_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i8; } -def atomic_store_local_16_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + +def atomic_store_16_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i16; } -def atomic_store_local_32_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + +def atomic_store_32_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i32; } -def atomic_store_local_64_m0 : PatFrag < - (ops node:$value, node:$ptr), - (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + +def atomic_store_64_glue : PatFrag < + (ops node:$ptr, node:$value), + (AMDGPUatomic_st_glue node:$ptr, node:$value)> { let IsAtomic = 1; let MemoryVT = i64; } -} // End let AddressSpaces = StoreAddress_local.AddrSpaces + +let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { +def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_8_glue node:$ptr, node:$val)>; +def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_16_glue node:$ptr, node:$val)>; +def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_32_glue node:$ptr, node:$val)>; +def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val), + (atomic_store_64_glue node:$ptr, node:$val)>; +} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces def si_setcc_uniform : PatFrag < @@ -686,10 +774,14 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, let AddressSpaces = StoreAddress_local.AddrSpaces in { defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; + defm _local_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), + IsInt>; } let AddressSpaces = StoreAddress_region.AddrSpaces in { defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; + defm _region_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), + IsInt>; } } @@ -954,6 +1046,18 @@ def SWaitMatchClass : AsmOperandClass { let ParserMethod = "parseSWaitCntOps"; } +def DepCtrMatchClass : AsmOperandClass { + let Name = "DepCtr"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseDepCtrOps"; +} + +def SDelayMatchClass : AsmOperandClass { + let Name = "SDelayAlu"; + let RenderMethod = "addImmOperands"; + let ParserMethod = "parseSDelayAluOps"; +} + def VReg32OrOffClass : AsmOperandClass { let Name = "VReg32OrOff"; let ParserMethod = "parseVReg32OrOff"; @@ -979,6 +1083,16 @@ def WAIT_FLAG : Operand <i32> { let ParserMatchClass = SWaitMatchClass; let PrintMethod = "printWaitFlag"; } + +def DepCtrImm : Operand <i32> { + let ParserMatchClass = DepCtrMatchClass; + let PrintMethod = "printDepCtr"; +} + +def DELAY_FLAG : Operand <i32> { + let ParserMatchClass = SDelayMatchClass; + let PrintMethod = "printDelayFlag"; +} } // End OperandType = "OPERAND_IMMEDIATE" include "SIInstrFormats.td" @@ -1163,14 +1277,6 @@ def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>; def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; -def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; - -def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; -def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; -def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; -def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; -def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; - def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>; def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>; def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>; @@ -1181,6 +1287,14 @@ def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>; def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; +def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; +def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; + +def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; +def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; +def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; +def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; + def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; @@ -1191,6 +1305,9 @@ def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { } +def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>; +def wait_exp : NamedOperandU8<"WaitEXP", NamedMatchClass<"WaitEXP">>; + } // End OperandType = "OPERAND_IMMEDIATE" class KImmMatchClass<int size> : AsmOperandClass { @@ -1223,10 +1340,18 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass { let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; } +class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { + let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; + let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; +} + def FP16InputModsMatchClass : FPInputModsMatchClass<16>; def FP32InputModsMatchClass : FPInputModsMatchClass<32>; def FP64InputModsMatchClass : FPInputModsMatchClass<64>; +def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; +def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; + class InputMods <AsmOperandClass matchClass> : Operand <i32> { let OperandNamespace = "AMDGPU"; let OperandType = "OPERAND_INPUT_MODS"; @@ -1241,19 +1366,28 @@ def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; +def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>; +def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; + class IntInputModsMatchClass <int opSize> : AsmOperandClass { let Name = "RegOrImmWithInt"#opSize#"InputMods"; let ParserMethod = "parseRegOrImmWithIntInputMods"; let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; } +class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { + let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; + let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; +} def Int32InputModsMatchClass : IntInputModsMatchClass<32>; def Int64InputModsMatchClass : IntInputModsMatchClass<64>; +def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { let PrintMethod = "printOperandAndIntInputMods"; } def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; +def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; class OpSelModsMatchClass : AsmOperandClass { let Name = "OpSelMods"; @@ -1366,12 +1500,19 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; +def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; +def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">; +def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; + def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; +def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; +def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// @@ -1575,6 +1716,19 @@ class getVOP3SrcForVT<ValueType VT> { ); } +// Src2 of VOP3 DPP instructions cannot be a literal +class getVOP3DPPSrcForVT<ValueType VT> { + bit isFP = isFloatType<VT>.ret; + RegisterOperand ret = + !if (!eq(VT.Value, i1.Value), SSrc_i1, + !if (isFP, + !if (!eq(VT.Value, f16.Value), VCSrc_f16, + !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)), + !if (!eq(VT.Value, i16.Value), VCSrc_b16, + !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, + VCSrc_b32)))); +} + // Float or packed int class isModifierType<ValueType SrcVT> { bit ret = !or(!eq(SrcVT.Value, f16.Value), @@ -1583,7 +1737,17 @@ class isModifierType<ValueType SrcVT> { !eq(SrcVT.Value, v2f16.Value), !eq(SrcVT.Value, v2i16.Value), !eq(SrcVT.Value, v2f32.Value), - !eq(SrcVT.Value, v2i32.Value)); + !eq(SrcVT.Value, v2i32.Value), + !eq(SrcVT.Value, v4f16.Value), + !eq(SrcVT.Value, v4i16.Value), + !eq(SrcVT.Value, v4f32.Value), + !eq(SrcVT.Value, v4i32.Value), + !eq(SrcVT.Value, v8f16.Value), + !eq(SrcVT.Value, v8i16.Value), + !eq(SrcVT.Value, v8f32.Value), + !eq(SrcVT.Value, v8i32.Value), + !eq(SrcVT.Value, v16f16.Value), + !eq(SrcVT.Value, v16i16.Value)); } // Return type of input modifiers operand for specified input operand @@ -1611,6 +1775,17 @@ class getSrcModDPP <ValueType VT> { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +// Return type of input modifiers operand for specified input operand for DPP +class getSrcModVOP3DPP <ValueType VT, bit EnableF32SrcMods> { + bit isFP = isFloatType<VT>.ret; + bit isPacked = isPackedType<VT>.ret; + Operand ret = + !if (isFP, + !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, + FP32VCSrcInputMods), + !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods)); +} + // Return type of input modifiers operand specified input operand for SDWA class getSrcModSDWA <ValueType VT> { Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, @@ -1620,7 +1795,7 @@ class getSrcModSDWA <ValueType VT> { } // Returns the input arguments for VOP[12C] instructions for the given SrcVT. -class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> { +class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 (ins))); @@ -1715,19 +1890,21 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; dag opsel = (ins op_sel0:$op_sel); - dag vop3pFields = (ins op_sel_hi0:$op_sel_hi, neg_lo0:$neg_lo, neg_hi0:$neg_hi); + dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); + dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); + dag ret = !con(base, !if(HasOpSel, opsel,(ins)), !if(IsVOP3P, vop3pFields,(ins))); } class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, - RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, + RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, - 1/*HasOpSel*/, 1/*IsVOP3P*/>.ret; + HasOpSel, 1/*IsVOP3P*/>.ret; } class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, @@ -1741,8 +1918,8 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, } class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, - int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod> { + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { dag ret = !if (!eq(NumSrcArgs, 0), // VOP1 without input operands (V_NOP) @@ -1756,6 +1933,7 @@ class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass // VOP1_DPP without modifiers (ins OldRC:$old, Src0RC:$src0) /* endif */), + !if (!eq(NumSrcArgs, 2), !if (HasModifiers, // VOP2_DPP with modifiers (ins OldRC:$old, @@ -1765,34 +1943,72 @@ class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass // VOP2_DPP without modifiers (ins OldRC:$old, Src0RC:$src0, Src1RC:$src1) - ))); + ) + /* NumSrcArgs == 3, VOP3 */, + !if (HasModifiers, + // VOP3_DPP with modifiers + (ins OldRC:$old, + Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + Src2Mod:$src2_modifiers, Src2RC:$src2) + /* else */, + // VOP3_DPP without modifiers + (ins OldRC:$old, + Src0RC:$src0, Src1RC:$src1, + Src2RC:$src2) + ) + /* endif */))); } class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, - int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod> { - dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod>.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, + HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, - bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); } class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, - int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod> { - dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod>.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, + HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, (ins FI:$fi)); } class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, - int NumSrcArgs, bit HasModifiers, - Operand Src0Mod, Operand Src1Mod> { - dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs, - HasModifiers, Src0Mod, Src1Mod>.ret, + RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { + dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, + HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret, (ins dpp8:$dpp8, FI:$fi)); } +class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { + dag old = ( ins OldRC:$old ); + dag base = VOP3Base; + dag ret = !con( + !if(!ne(NumSrcArgs, 0), old, (ins)), + base + ); +} + +class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { + dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, + (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); +} + +class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { + dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs>.ret, + (ins FI:$fi)); +} + +class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> { + dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret, + (ins dpp8:$dpp8, FI:$fi)); +} // Ins for SDWA class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, @@ -1870,6 +2086,15 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); } +class getAsmVOPDPart <int NumSrcArgs, string XorY> { + string dst = "$vdst" # XorY; + string src0 = ", $src0" # XorY; + string src1 = ", $vsrc1" # XorY; + string ret = dst # + !if(!ge(NumSrcArgs, 1), src0, "") # + !if(!ge(NumSrcArgs, 2), src1, ""); +} + // Returns the assembly string for the inputs and outputs of a VOP3 // instruction. class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers, @@ -1890,7 +2115,7 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers, // Returns the assembly string for the inputs and outputs of a VOP3P // instruction. class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, - bit HasClamp> { + bit HasClamp, bit HasOpSel> { string dst = "$vdst"; string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); string src1 = !if(!eq(NumSrcArgs, 1), "", @@ -1900,10 +2125,11 @@ class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); string clamp = !if(HasClamp, "$clamp", ""); + string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); // Each modifier is printed as an array of bits for each operand, so // all operands are printed as part of src0_modifiers. - string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp; + string ret = dst#", "#src0#src1#src2#opsel#mods#clamp; } class getAsmVOP3OpSel <int NumSrcArgs, @@ -1930,8 +2156,8 @@ class getAsmVOP3OpSel <int NumSrcArgs, string src2 = !if(Src2HasMods, fsrc2, isrc2); string clamp = !if(HasClamp, "$clamp", ""); - - string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp; + string omod = ""; + string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; } class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { @@ -1955,15 +2181,63 @@ class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT } class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> - : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT> { + : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ let ret = dst#args#" $dpp8$fi"; } +class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp, + bit HasOpSel, bit HasOMod, bit IsVOP3P, + bit HasModifiers, bit Src0HasMods, + bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", + "$vdst"), + ""); // use $sdst for VOPC + string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); + string isrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1", + " $src1,")); + string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); + + string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); + string fsrc1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); + + string src0 = !if(Src0HasMods, fsrc0, isrc0); + string src1 = !if(Src1HasMods, fsrc1, isrc1); + string src2 = !if(Src2HasMods, fsrc2, isrc2); + string opsel = !if(HasOpSel, "$op_sel", ""); + string 3PMods = !if(IsVOP3P, + !if(HasOpSel, "$op_sel_hi", "") + #!if(HasModifiers, "$neg_lo$neg_hi", ""), + ""); + string clamp = !if(HasClamp, "$clamp", ""); + string omod = !if(HasOMod, "$omod", ""); + + string ret = dst#", "#src0#src1#src2#opsel#3PMods#clamp#omod; + +} + +class getAsmVOP3DPP<string base> { + string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; +} + +class getAsmVOP3DPP16<string base> { + string ret = getAsmVOP3DPP<base>.ret # "$fi"; +} + +class getAsmVOP3DPP8<string base> { + string ret = base # " $dpp8$fi"; +} + class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), - " vcc", // use vcc token as dst for VOPC instructioins + " vcc", // use vcc token as dst for VOPC instructions "$vdst"), ""); string src0 = "$src0_modifiers"; @@ -2056,6 +2330,12 @@ class getHasDPP <int NumSrcArgs> { 1); } +class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, + ValueType Src1VT = i32> { + bit ret = !and(getHasDPP<NumSrcArgs>.ret, + !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); +} + class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, ValueType Src1VT = i32> { bit ret = !and(getHasDPP<NumSrcArgs>.ret, @@ -2089,6 +2369,24 @@ class BitAnd<bit a, bit b> { bit ret = !if(a, !if(b, 1, 0), 0); } +class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, + ValueType Src1VT = i32, ValueType Src2VT = i32> { + bit ret = !if(!eq(DstVT.Size, 64), + 0, // 64-bit dst No DPP for 64-bit operands + !if(!eq(Src0VT.Size, 64), + 0, // 64-bit src0 + !if(!eq(Src1VT.Size, 64), + 0, // 64-bit src1 + !if(!eq(Src2VT.Size, 64), + 0, // 64-bit src2 + 1 + ) + ) + ) + ); +} + + def PatGenMode { int NoPattern = 0; int Pattern = 1; @@ -2106,15 +2404,20 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, field ValueType Src1VT = ArgVT[2]; field ValueType Src2VT = ArgVT[3]; field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; + field RegisterOperand DstRC64 = DstRC; field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret; field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; - field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret; + field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>; field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; + field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret; + field RegisterOperand Src0VOP3DPP = VGPRSrc_32; + field RegisterOperand Src1VOP3DPP = VGPRSrc_32; + field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret; @@ -2122,6 +2425,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret; field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; + field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; + field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, EnableF32SrcMods>.ret; field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; @@ -2169,15 +2474,20 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; - field bit HasExtDPP = getHasDPP<NumSrcArgs>.ret; + field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; + field bit HasExtDPP = !if(!or(getHasDPP<NumSrcArgs>.ret, + HasExtVOP3DPP), 1, 0); + field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; field bit HasExtSDWA9 = HasExtSDWA; field int NeedPatGen = PatGenMode.NoPattern; field bit IsMAI = 0; + field bit IsVOP3P = 0; field bit IsDOT = 0; field bit IsSingle = 0; + field bit IsWMMA = 0; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -2188,9 +2498,11 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, // VOP3b instructions are a special case with a second explicit // output. This is manually overridden for them. field dag Outs32 = Outs; - field dag Outs64 = Outs; + field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; field dag OutsDPP8 = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; + field dag OutsVOP3DPP = OutsDPP; + field dag OutsVOP3DPP8 = OutsDPP8; field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; @@ -2198,7 +2510,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, HasIntClamp, HasModifiers, HasSrc2Mods, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, - NumSrcArgs, HasClamp, + NumSrcArgs, HasClamp, HasOpSel, Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, HasClamp, HasOMod, @@ -2206,21 +2518,35 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, getOpSelMod<Src1VT>.ret, getOpSelMod<Src2VT>.ret>.ret; field dag InsDPP = !if(HasExtDPP, - getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, - HasModifiers, Src0ModDPP, Src1ModDPP>.ret, + getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, + HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, (ins)); - field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, - HasModifiers, Src0ModDPP, Src1ModDPP>.ret; - field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0, - Src0ModDPP, Src1ModDPP>.ret; + field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, + HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; + field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, + NumSrcArgs, HasModifiers, + Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; + field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, + Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, + Src0ModDPP, Src1ModDPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; + field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; + field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; + field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, DstVT>.ret; + field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); + // It is a slight misnomer to use the deferred f32 operand type for non-float + // operands, but this operand type will only be used if the other dual + // component is FMAAK or FMAMK + field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); + field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); + field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret; - field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp>.ret; + field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret; field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, HasSrc0FloatMods, @@ -2232,15 +2558,24 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, // DPP8 encoding has no fields for modifiers, and it is enforced by setting // the asm operand name via this HasModifiers flag field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; + field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp, + HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasSrc0FloatMods, HasSrc1FloatMods, + HasSrc2FloatMods, DstVT >.ret; + field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret; + field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret; + field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret; field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; - + field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; + field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; field string TieRegDPP = "$old"; } -class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { + class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { let HasExt = 0; let HasExtDPP = 0; + let HasExtVOP3DPP = 0; + let HasExt32BitDPP = 0; let HasExt64BitDPP = 0; let HasExtSDWA = 0; let HasExtSDWA9 = 0; @@ -2249,10 +2584,10 @@ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { let NeedPatGen = mode; } - def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; +def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; @@ -2264,6 +2599,7 @@ def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; +def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; @@ -2274,6 +2610,10 @@ def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; +def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; +def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>; +def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>; + def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; @@ -2343,6 +2683,18 @@ def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; +def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; +def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; +def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; +def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; + +def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; +def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; +def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; +def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; +def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; +def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; + class Commutable_REV <string revOp, bit isOrig> { string RevOp = revOp; bit IsOrig = isOrig; @@ -2394,10 +2746,11 @@ multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; - let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { + let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; - } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" + } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" } + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// @@ -2470,6 +2823,7 @@ def getMCOpcodeGen : InstrMapping { let RowFields = ["PseudoInstr"]; let ColFields = ["Subtarget"]; let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; + // These columns must be kept in sync with the SIEncodingFamily enumeration. let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], [!cast<string>(SIEncodingFamily.VI)], [!cast<string>(SIEncodingFamily.SDWA)], @@ -2482,7 +2836,9 @@ def getMCOpcodeGen : InstrMapping { [!cast<string>(SIEncodingFamily.GFX9)], [!cast<string>(SIEncodingFamily.GFX10)], [!cast<string>(SIEncodingFamily.SDWA10)], - [!cast<string>(SIEncodingFamily.GFX90A)]]; + [!cast<string>(SIEncodingFamily.GFX90A)], + [!cast<string>(SIEncodingFamily.GFX940)], + [!cast<string>(SIEncodingFamily.GFX11)]]; } // Get equivalent SOPK instruction. @@ -2510,14 +2866,6 @@ def getIfAddr64Inst : InstrMapping { let ValueCols = [["1"]]; } -def getMUBUFNoLdsInst : InstrMapping { - let FilterClass = "MUBUFLdsTable"; - let RowFields = ["OpName"]; - let ColFields = ["IsLds"]; - let KeyCol = ["1"]; - let ValueCols = [["0"]]; -} - // Maps an atomic opcode to its returnless version. def getAtomicNoRetOp : InstrMapping { let FilterClass = "AtomicNoRet"; @@ -2580,6 +2928,14 @@ def getFlatScratchInstSSfromSV : InstrMapping { let ValueCols = [["SS"]]; } +def getFlatScratchInstSVfromSVS : InstrMapping { + let FilterClass = "FlatScratchInst"; + let RowFields = ["SVOp"]; + let ColFields = ["Mode"]; + let KeyCol = ["SVS"]; + let ValueCols = [["SV"]]; +} + def getFlatScratchInstSVfromSS : InstrMapping { let FilterClass = "FlatScratchInst"; let RowFields = ["SVOp"]; @@ -2596,6 +2952,15 @@ def getMFMAEarlyClobberOp : InstrMapping { let ValueCols = [["0"]]; } +// Maps an v_cmp instruction to its v_cmpx equivalent. +def getVCMPXOpFromVCMP : InstrMapping { + let FilterClass = "VCMPVCMPXTable"; + let RowFields = ["VCMPOp"]; + let ColFields = ["IsVCMPX"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" include "DSInstructions.td" |
