aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.td')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td625
1 files changed, 495 insertions, 130 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 713a08907e99..29ee9f12b12d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===//
+//===-- SIInstrInfo.td -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,7 +17,8 @@ class GCNPredicateControl : PredicateControl {
}
// Except for the NONE field, this must be kept in sync with the
-// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
+// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
+// getMCOpcodeGen table.
def SIEncodingFamily {
int NONE = -1;
int SI = 0;
@@ -29,6 +30,8 @@ def SIEncodingFamily {
int GFX10 = 6;
int SDWA10 = 7;
int GFX90A = 8;
+ int GFX940 = 9;
+ int GFX11 = 10;
}
//===----------------------------------------------------------------------===//
@@ -190,6 +193,44 @@ def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
+multiclass SDBufferAtomicRetNoRet {
+ def "_ret" : PatFrag<
+ (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
+ node:$offset, node:$cachepolicy, node:$idxen),
+ (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }];
+ let GISelPredicateCode = [{ return true; }];
+ }
+
+ def "_noret" : PatFrag<
+ (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
+ node:$offset, node:$cachepolicy, node:$idxen),
+ (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return SDValue(N, 0).use_empty(); }];
+ let GISelPredicateCode = [{ return false; }];
+ }
+}
+
+defm SIbuffer_atomic_swap : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_add : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_sub : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_smin : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_umin : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_smax : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_umax : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_and : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_or : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_xor : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_inc : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_dec : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_fadd : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_fmin : SDBufferAtomicRetNoRet;
+defm SIbuffer_atomic_fmax : SDBufferAtomicRetNoRet;
+
def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
SDTypeProfile<1, 9,
[SDTCisVT<0, i32>, // dst
@@ -205,6 +246,26 @@ def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
>;
+def SIbuffer_atomic_cmpswap_ret : PatFrag<
+ (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
+ node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
+ (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return !(SDValue(N, 0).use_empty()); }];
+ let GISelPredicateCode = [{ return true; }];
+}
+
+def SIbuffer_atomic_cmpswap_noret : PatFrag<
+ (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
+ node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
+ (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
+ node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
+ node:$idxen)> {
+ let PredicateCode = [{ return SDValue(N, 0).use_empty(); }];
+ let GISelPredicateCode = [{ return false; }];
+}
+
class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
SDTypeProfile<0, 2,
[SDTCisPtrTy<0>, // vaddr
@@ -255,35 +316,57 @@ def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
>;
+def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD",
+ SDTFPRoundOp
+>;
+
+def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD",
+ SDTFPRoundOp
+>;
+
//===----------------------------------------------------------------------===//
// ValueType helpers
//===----------------------------------------------------------------------===//
// Returns 1 if the source arguments have modifiers, 0 if they do not.
-// XXX - do f16 instructions?
class isFloatType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, f16.Value),
!eq(SrcVT.Value, f32.Value),
!eq(SrcVT.Value, f64.Value),
!eq(SrcVT.Value, v2f16.Value),
!eq(SrcVT.Value, v4f16.Value),
+ !eq(SrcVT.Value, v8f16.Value),
+ !eq(SrcVT.Value, v16f16.Value),
!eq(SrcVT.Value, v2f32.Value),
+ !eq(SrcVT.Value, v4f32.Value),
+ !eq(SrcVT.Value, v8f32.Value),
!eq(SrcVT.Value, v2f64.Value),
!eq(SrcVT.Value, v4f64.Value));
}
+// XXX - do v2i16 instructions?
class isIntType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, i16.Value),
!eq(SrcVT.Value, i32.Value),
!eq(SrcVT.Value, i64.Value),
- !eq(SrcVT.Value, v2i32.Value));
+ !eq(SrcVT.Value, v4i16.Value),
+ !eq(SrcVT.Value, v8i16.Value),
+ !eq(SrcVT.Value, v16i16.Value),
+ !eq(SrcVT.Value, v2i32.Value),
+ !eq(SrcVT.Value, v4i32.Value),
+ !eq(SrcVT.Value, v8i32.Value));
}
class isPackedType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, v2i16.Value),
!eq(SrcVT.Value, v2f16.Value),
!eq(SrcVT.Value, v4f16.Value),
- !eq(SrcVT.Value, v2f32.Value));
+ !eq(SrcVT.Value, v2i32.Value),
+ !eq(SrcVT.Value, v2f32.Value),
+ !eq(SrcVT.Value, v4i32.Value),
+ !eq(SrcVT.Value, v4f32.Value),
+ !eq(SrcVT.Value, v8i32.Value),
+ !eq(SrcVT.Value, v8f32.Value));
}
@@ -291,19 +374,10 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
-let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
-
-
-defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>;
-defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>;
-defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>;
-defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>;
-
-
-} // End let AddressSpaces = ...
-} // End foreach AddrSpace
-
+defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>;
+defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>;
+defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>;
+defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>;
//===----------------------------------------------------------------------===//
// SDNodes PatFrags for loads/stores with a glue input.
@@ -408,50 +482,36 @@ def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
let IsNonExtLoad = 1;
}
-let MemoryVT = i8 in {
def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
-}
-let MemoryVT = i16 in {
def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
-}
+} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces
def load_align8_local_m0 : PatFrag<(ops node:$ptr),
- (load_local_m0 node:$ptr)>, Aligned<8> {
+ (load_local_m0 node:$ptr)> {
let IsLoad = 1;
- let IsNonExtLoad = 1;
+ int MinAlignment = 8;
}
def load_align16_local_m0 : PatFrag<(ops node:$ptr),
- (load_local_m0 node:$ptr)>, Aligned<16> {
+ (load_local_m0 node:$ptr)> {
let IsLoad = 1;
- let IsNonExtLoad = 1;
+ int MinAlignment = 16;
}
-} // End IsLoad = 1
-
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_8_glue node:$ptr)> {
- let MemoryVT = i8;
-}
+ (atomic_load_8_glue node:$ptr)>;
def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_16_glue node:$ptr)> {
- let MemoryVT = i16;
-}
+ (atomic_load_16_glue node:$ptr)>;
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_32_glue node:$ptr)> {
- let MemoryVT = i32;
-}
+ (atomic_load_32_glue node:$ptr)>;
def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
- (atomic_load_64_glue node:$ptr)> {
- let MemoryVT = i64;
-}
-
+ (atomic_load_64_glue node:$ptr)>;
} // End let AddressSpaces = LoadAddress_local.AddrSpaces
@@ -485,75 +545,103 @@ def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
(truncstore_glue node:$val, node:$ptr)> {
let IsStore = 1;
let MemoryVT = i8;
+ let IsTruncStore = 1;
}
def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
(truncstore_glue node:$val, node:$ptr)> {
let IsStore = 1;
let MemoryVT = i16;
+ let IsTruncStore = 1;
}
let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
- (store_glue node:$val, node:$ptr)> {
- let IsStore = 1;
- let IsTruncStore = 0;
-}
-
+ (store_glue node:$val, node:$ptr)>;
def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr)> {
- let IsStore = 1;
- let MemoryVT = i8;
-}
-
+ (truncstorei8_glue node:$val, node:$ptr)>;
def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
- (unindexedstore_glue node:$val, node:$ptr)> {
- let IsStore = 1;
- let MemoryVT = i16;
-}
+ (truncstorei16_glue node:$val, node:$ptr)>;
}
def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
(store_local_m0 node:$value, node:$ptr)>,
Aligned<8> {
let IsStore = 1;
- let IsTruncStore = 0;
}
def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
(store_local_m0 node:$value, node:$ptr)>,
Aligned<16> {
let IsStore = 1;
+}
+
+let PredicateCode = [{return cast<MemSDNode>(N)->getAlignment() < 4;}],
+ GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}],
+ AddressSpaces = [ AddrSpaces.Local ] in {
+def load_align_less_than_4_local : PatFrag<(ops node:$ptr),
+ (load_local node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+}
+
+def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr),
+ (load_local_m0 node:$ptr)> {
+ let IsLoad = 1;
+ let IsNonExtLoad = 1;
+}
+
+def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr),
+ (store_local node:$value, node:$ptr)> {
+ let IsStore = 1;
let IsTruncStore = 0;
}
-let AddressSpaces = StoreAddress_local.AddrSpaces in {
+def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr),
+ (store_local_m0 node:$value, node:$ptr)> {
+ let IsStore = 1;
+ let IsTruncStore = 0;
+}
+}
-def atomic_store_local_8_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+def atomic_store_8_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i8;
}
-def atomic_store_local_16_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+
+def atomic_store_16_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i16;
}
-def atomic_store_local_32_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+
+def atomic_store_32_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i32;
}
-def atomic_store_local_64_m0 : PatFrag <
- (ops node:$value, node:$ptr),
- (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+
+def atomic_store_64_glue : PatFrag <
+ (ops node:$ptr, node:$value),
+ (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
let IsAtomic = 1;
let MemoryVT = i64;
}
-} // End let AddressSpaces = StoreAddress_local.AddrSpaces
+
+let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
+def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_8_glue node:$ptr, node:$val)>;
+def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_16_glue node:$ptr, node:$val)>;
+def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_32_glue node:$ptr, node:$val)>;
+def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val),
+ (atomic_store_64_glue node:$ptr, node:$val)>;
+} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
def si_setcc_uniform : PatFrag <
@@ -686,10 +774,14 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
let AddressSpaces = StoreAddress_local.AddrSpaces in {
defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ defm _local_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
+ IsInt>;
}
let AddressSpaces = StoreAddress_region.AddrSpaces in {
defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>;
+ defm _region_m0 : ret_noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"),
+ IsInt>;
}
}
@@ -954,6 +1046,18 @@ def SWaitMatchClass : AsmOperandClass {
let ParserMethod = "parseSWaitCntOps";
}
+def DepCtrMatchClass : AsmOperandClass {
+ let Name = "DepCtr";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseDepCtrOps";
+}
+
+def SDelayMatchClass : AsmOperandClass {
+ let Name = "SDelayAlu";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseSDelayAluOps";
+}
+
def VReg32OrOffClass : AsmOperandClass {
let Name = "VReg32OrOff";
let ParserMethod = "parseVReg32OrOff";
@@ -979,6 +1083,16 @@ def WAIT_FLAG : Operand <i32> {
let ParserMatchClass = SWaitMatchClass;
let PrintMethod = "printWaitFlag";
}
+
+def DepCtrImm : Operand <i32> {
+ let ParserMatchClass = DepCtrMatchClass;
+ let PrintMethod = "printDepCtr";
+}
+
+def DELAY_FLAG : Operand <i32> {
+ let ParserMatchClass = SDelayMatchClass;
+ let PrintMethod = "printDelayFlag";
+}
} // End OperandType = "OPERAND_IMMEDIATE"
include "SIInstrFormats.td"
@@ -1163,14 +1277,6 @@ def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>;
def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
-def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
-
-def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
-def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
-def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
-def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
-def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
-
def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
@@ -1181,6 +1287,14 @@ def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
+def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
+
+def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
+def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
+def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
+def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
+
def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
@@ -1191,6 +1305,9 @@ def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
}
+def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>;
+def wait_exp : NamedOperandU8<"WaitEXP", NamedMatchClass<"WaitEXP">>;
+
} // End OperandType = "OPERAND_IMMEDIATE"
class KImmMatchClass<int size> : AsmOperandClass {
@@ -1223,10 +1340,18 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
}
+class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
+ let Name = "RegOrInlineImmWithFP"#opSize#"InputMods";
+ let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
+}
+
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
+def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>;
+def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
+
class InputMods <AsmOperandClass matchClass> : Operand <i32> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_INPUT_MODS";
@@ -1241,19 +1366,28 @@ def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
+def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>;
+def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
+
class IntInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "RegOrImmWithInt"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithIntInputMods";
let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
}
+class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> {
+ let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
+ let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
+}
def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
+def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
+def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
class OpSelModsMatchClass : AsmOperandClass {
let Name = "OpSelMods";
@@ -1366,12 +1500,19 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
+def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
+def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">;
+def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
+
def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
+def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
+def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -1575,6 +1716,19 @@ class getVOP3SrcForVT<ValueType VT> {
);
}
+// Src2 of VOP3 DPP instructions cannot be a literal
+class getVOP3DPPSrcForVT<ValueType VT> {
+ bit isFP = isFloatType<VT>.ret;
+ RegisterOperand ret =
+ !if (!eq(VT.Value, i1.Value), SSrc_i1,
+ !if (isFP,
+ !if (!eq(VT.Value, f16.Value), VCSrc_f16,
+ !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)),
+ !if (!eq(VT.Value, i16.Value), VCSrc_b16,
+ !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16,
+ VCSrc_b32))));
+}
+
// Float or packed int
class isModifierType<ValueType SrcVT> {
bit ret = !or(!eq(SrcVT.Value, f16.Value),
@@ -1583,7 +1737,17 @@ class isModifierType<ValueType SrcVT> {
!eq(SrcVT.Value, v2f16.Value),
!eq(SrcVT.Value, v2i16.Value),
!eq(SrcVT.Value, v2f32.Value),
- !eq(SrcVT.Value, v2i32.Value));
+ !eq(SrcVT.Value, v2i32.Value),
+ !eq(SrcVT.Value, v4f16.Value),
+ !eq(SrcVT.Value, v4i16.Value),
+ !eq(SrcVT.Value, v4f32.Value),
+ !eq(SrcVT.Value, v4i32.Value),
+ !eq(SrcVT.Value, v8f16.Value),
+ !eq(SrcVT.Value, v8i16.Value),
+ !eq(SrcVT.Value, v8f32.Value),
+ !eq(SrcVT.Value, v8i32.Value),
+ !eq(SrcVT.Value, v16f16.Value),
+ !eq(SrcVT.Value, v16i16.Value));
}
// Return type of input modifiers operand for specified input operand
@@ -1611,6 +1775,17 @@ class getSrcModDPP <ValueType VT> {
Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods);
}
+// Return type of input modifiers operand for specified input operand for DPP
+class getSrcModVOP3DPP <ValueType VT, bit EnableF32SrcMods> {
+ bit isFP = isFloatType<VT>.ret;
+ bit isPacked = isPackedType<VT>.ret;
+ Operand ret =
+ !if (isFP,
+ !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods,
+ FP32VCSrcInputMods),
+ !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods));
+}
+
// Return type of input modifiers operand specified input operand for SDWA
class getSrcModSDWA <ValueType VT> {
Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
@@ -1620,7 +1795,7 @@ class getSrcModSDWA <ValueType VT> {
}
// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
-class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
+class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> {
dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1
!if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
(ins)));
@@ -1715,19 +1890,21 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
dag opsel = (ins op_sel0:$op_sel);
- dag vop3pFields = (ins op_sel_hi0:$op_sel_hi, neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+ dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
+ dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi));
+
dag ret = !con(base,
!if(HasOpSel, opsel,(ins)),
!if(IsVOP3P, vop3pFields,(ins)));
}
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
- RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp,
+ RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod,
- 1/*HasOpSel*/, 1/*IsVOP3P*/>.ret;
+ HasOpSel, 1/*IsVOP3P*/>.ret;
}
class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
@@ -1741,8 +1918,8 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
}
class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret = !if (!eq(NumSrcArgs, 0),
// VOP1 without input operands (V_NOP)
@@ -1756,6 +1933,7 @@ class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass
// VOP1_DPP without modifiers
(ins OldRC:$old, Src0RC:$src0)
/* endif */),
+ !if (!eq(NumSrcArgs, 2),
!if (HasModifiers,
// VOP2_DPP with modifiers
(ins OldRC:$old,
@@ -1765,34 +1943,72 @@ class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass
// VOP2_DPP without modifiers
(ins OldRC:$old,
Src0RC:$src0, Src1RC:$src1)
- )));
+ )
+ /* NumSrcArgs == 3, VOP3 */,
+ !if (HasModifiers,
+ // VOP3_DPP with modifiers
+ (ins OldRC:$old,
+ Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2)
+ /* else */,
+ // VOP3_DPP without modifiers
+ (ins OldRC:$old,
+ Src0RC:$src0, Src1RC:$src1,
+ Src2RC:$src2)
+ )
+ /* endif */)));
}
class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
- dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod>.ret,
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
- bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
}
class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
- dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod>.ret,
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins FI:$fi));
}
class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
- int NumSrcArgs, bit HasModifiers,
- Operand Src0Mod, Operand Src1Mod> {
- dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod>.ret,
+ RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret,
(ins dpp8:$dpp8, FI:$fi));
}
+class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag old = ( ins OldRC:$old );
+ dag base = VOP3Base;
+ dag ret = !con(
+ !if(!ne(NumSrcArgs, 0), old, (ins)),
+ base
+ );
+}
+
+class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret,
+ (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+ bank_mask:$bank_mask, bound_ctrl:$bound_ctrl));
+}
+
+class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs>.ret,
+ (ins FI:$fi));
+}
+
+class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs> {
+ dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs>.ret,
+ (ins dpp8:$dpp8, FI:$fi));
+}
// Ins for SDWA
class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
@@ -1870,6 +2086,15 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
!if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
}
+class getAsmVOPDPart <int NumSrcArgs, string XorY> {
+ string dst = "$vdst" # XorY;
+ string src0 = ", $src0" # XorY;
+ string src1 = ", $vsrc1" # XorY;
+ string ret = dst #
+ !if(!ge(NumSrcArgs, 1), src0, "") #
+ !if(!ge(NumSrcArgs, 2), src1, "");
+}
+
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
@@ -1890,7 +2115,7 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
// Returns the assembly string for the inputs and outputs of a VOP3P
// instruction.
class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
- bit HasClamp> {
+ bit HasClamp, bit HasOpSel> {
string dst = "$vdst";
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@@ -1900,10 +2125,11 @@ class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
string clamp = !if(HasClamp, "$clamp", "");
+ string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
// Each modifier is printed as an array of bits for each operand, so
// all operands are printed as part of src0_modifiers.
- string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
+ string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
}
class getAsmVOP3OpSel <int NumSrcArgs,
@@ -1930,8 +2156,8 @@ class getAsmVOP3OpSel <int NumSrcArgs,
string src2 = !if(Src2HasMods, fsrc2, isrc2);
string clamp = !if(HasClamp, "$clamp", "");
-
- string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp;
+ string omod = "";
+ string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
}
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@@ -1955,15 +2181,63 @@ class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
}
class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
- : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT> {
+ : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{
let ret = dst#args#" $dpp8$fi";
}
+class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp,
+ bit HasOpSel, bit HasOMod, bit IsVOP3P,
+ bit HasModifiers, bit Src0HasMods,
+ bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> {
+ string dst = !if(HasDst,
+ !if(!eq(DstVT.Size, 1),
+ "$sdst",
+ "$vdst"),
+ ""); // use $sdst for VOPC
+ string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+ string isrc1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1",
+ " $src1,"));
+ string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
+
+ string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
+ string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
+ " $src1_modifiers,"));
+ string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
+
+ string src0 = !if(Src0HasMods, fsrc0, isrc0);
+ string src1 = !if(Src1HasMods, fsrc1, isrc1);
+ string src2 = !if(Src2HasMods, fsrc2, isrc2);
+ string opsel = !if(HasOpSel, "$op_sel", "");
+ string 3PMods = !if(IsVOP3P,
+ !if(HasOpSel, "$op_sel_hi", "")
+ #!if(HasModifiers, "$neg_lo$neg_hi", ""),
+ "");
+ string clamp = !if(HasClamp, "$clamp", "");
+ string omod = !if(HasOMod, "$omod", "");
+
+ string ret = dst#", "#src0#src1#src2#opsel#3PMods#clamp#omod;
+
+}
+
+class getAsmVOP3DPP<string base> {
+ string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+}
+
+class getAsmVOP3DPP16<string base> {
+ string ret = getAsmVOP3DPP<base>.ret # "$fi";
+}
+
+class getAsmVOP3DPP8<string base> {
+ string ret = base # " $dpp8$fi";
+}
+
class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
string dst = !if(HasDst,
!if(!eq(DstVT.Size, 1),
- " vcc", // use vcc token as dst for VOPC instructioins
+ " vcc", // use vcc token as dst for VOPC instructions
"$vdst"),
"");
string src0 = "$src0_modifiers";
@@ -2056,6 +2330,12 @@ class getHasDPP <int NumSrcArgs> {
1);
}
+class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
+ ValueType Src1VT = i32> {
+ bit ret = !and(getHasDPP<NumSrcArgs>.ret,
+ !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret));
+}
+
class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
bit ret = !and(getHasDPP<NumSrcArgs>.ret,
@@ -2089,6 +2369,24 @@ class BitAnd<bit a, bit b> {
bit ret = !if(a, !if(b, 1, 0), 0);
}
+class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
+ ValueType Src1VT = i32, ValueType Src2VT = i32> {
+ bit ret = !if(!eq(DstVT.Size, 64),
+ 0, // 64-bit dst No DPP for 64-bit operands
+ !if(!eq(Src0VT.Size, 64),
+ 0, // 64-bit src0
+ !if(!eq(Src1VT.Size, 64),
+ 0, // 64-bit src1
+ !if(!eq(Src2VT.Size, 64),
+ 0, // 64-bit src2
+ 1
+ )
+ )
+ )
+ );
+}
+
+
def PatGenMode {
int NoPattern = 0;
int Pattern = 1;
@@ -2106,15 +2404,20 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field ValueType Src1VT = ArgVT[2];
field ValueType Src2VT = ArgVT[3];
field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRC64 = DstRC;
field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret;
field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
- field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret;
+ field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret;
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
+ field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret;
+ field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
+ field RegisterOperand Src1VOP3DPP = VGPRSrc_32;
+ field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
@@ -2122,6 +2425,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
+ field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
+ field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, EnableF32SrcMods>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
@@ -2169,15 +2474,20 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
- field bit HasExtDPP = getHasDPP<NumSrcArgs>.ret;
+ field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
+ field bit HasExtDPP = !if(!or(getHasDPP<NumSrcArgs>.ret,
+ HasExtVOP3DPP), 1, 0);
+ field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA9 = HasExtSDWA;
field int NeedPatGen = PatGenMode.NoPattern;
field bit IsMAI = 0;
+ field bit IsVOP3P = 0;
field bit IsDOT = 0;
field bit IsSingle = 0;
+ field bit IsWMMA = 0;
field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -2188,9 +2498,11 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
// VOP3b instructions are a special case with a second explicit
// output. This is manually overridden for them.
field dag Outs32 = Outs;
- field dag Outs64 = Outs;
+ field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs));
field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
field dag OutsDPP8 = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
+ field dag OutsVOP3DPP = OutsDPP;
+ field dag OutsVOP3DPP8 = OutsDPP8;
field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
@@ -2198,7 +2510,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
HasIntClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
- NumSrcArgs, HasClamp,
+ NumSrcArgs, HasClamp, HasOpSel,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOMod,
@@ -2206,21 +2518,35 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
getOpSelMod<Src1VT>.ret,
getOpSelMod<Src2VT>.ret>.ret;
field dag InsDPP = !if(HasExtDPP,
- getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
- HasModifiers, Src0ModDPP, Src1ModDPP>.ret,
+ getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
(ins));
- field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
- HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
- field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0,
- Src0ModDPP, Src1ModDPP>.ret;
+ field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
+ HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
+ field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
+ NumSrcArgs, HasModifiers,
+ Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
+ field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
+ Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
+ Src0ModDPP, Src1ModDPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret;
+ field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret;
+ field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret;
+ field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
DstVT>.ret;
+ field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
+ // It is a slight misnomer to use the deferred f32 operand type for non-float
+ // operands, but this operand type will only be used if the other dual
+ // component is FMAAK or FMAMK
+ field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
+ field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
+ field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
- field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp>.ret;
+ field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
HasClamp,
HasSrc0FloatMods,
@@ -2232,15 +2558,24 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
// DPP8 encoding has no fields for modifiers, and it is enforced by setting
// the asm operand name via this HasModifiers flag
field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
+ field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp,
+ HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasSrc0FloatMods, HasSrc1FloatMods,
+ HasSrc2FloatMods, DstVT >.ret;
+ field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret;
+ field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret;
+ field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
-
+ field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
+ field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
field string TieRegDPP = "$old";
}
-class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
+ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExt = 0;
let HasExtDPP = 0;
+ let HasExtVOP3DPP = 0;
+ let HasExt32BitDPP = 0;
let HasExt64BitDPP = 0;
let HasExtSDWA = 0;
let HasExtSDWA9 = 0;
@@ -2249,10 +2584,10 @@ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
let NeedPatGen = mode;
}
-
def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
+def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
@@ -2264,6 +2599,7 @@ def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
+def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
@@ -2274,6 +2610,10 @@ def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
+def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
+def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>;
+def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>;
+
def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
@@ -2343,6 +2683,18 @@ def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>;
def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>;
def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>;
+def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>;
+def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>;
+def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>;
+def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
+
+def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>;
+def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
+def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>;
+def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
+def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>;
+def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
+
class Commutable_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
@@ -2394,10 +2746,11 @@ multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
- let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
+ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
- } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
+ } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
}
+
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
@@ -2470,6 +2823,7 @@ def getMCOpcodeGen : InstrMapping {
let RowFields = ["PseudoInstr"];
let ColFields = ["Subtarget"];
let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
+ // These columns must be kept in sync with the SIEncodingFamily enumeration.
let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
[!cast<string>(SIEncodingFamily.VI)],
[!cast<string>(SIEncodingFamily.SDWA)],
@@ -2482,7 +2836,9 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX9)],
[!cast<string>(SIEncodingFamily.GFX10)],
[!cast<string>(SIEncodingFamily.SDWA10)],
- [!cast<string>(SIEncodingFamily.GFX90A)]];
+ [!cast<string>(SIEncodingFamily.GFX90A)],
+ [!cast<string>(SIEncodingFamily.GFX940)],
+ [!cast<string>(SIEncodingFamily.GFX11)]];
}
// Get equivalent SOPK instruction.
@@ -2510,14 +2866,6 @@ def getIfAddr64Inst : InstrMapping {
let ValueCols = [["1"]];
}
-def getMUBUFNoLdsInst : InstrMapping {
- let FilterClass = "MUBUFLdsTable";
- let RowFields = ["OpName"];
- let ColFields = ["IsLds"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
// Maps an atomic opcode to its returnless version.
def getAtomicNoRetOp : InstrMapping {
let FilterClass = "AtomicNoRet";
@@ -2580,6 +2928,14 @@ def getFlatScratchInstSSfromSV : InstrMapping {
let ValueCols = [["SS"]];
}
+def getFlatScratchInstSVfromSVS : InstrMapping {
+ let FilterClass = "FlatScratchInst";
+ let RowFields = ["SVOp"];
+ let ColFields = ["Mode"];
+ let KeyCol = ["SVS"];
+ let ValueCols = [["SV"]];
+}
+
def getFlatScratchInstSVfromSS : InstrMapping {
let FilterClass = "FlatScratchInst";
let RowFields = ["SVOp"];
@@ -2596,6 +2952,15 @@ def getMFMAEarlyClobberOp : InstrMapping {
let ValueCols = [["0"]];
}
+// Maps an v_cmp instruction to its v_cmpx equivalent.
+def getVCMPXOpFromVCMP : InstrMapping {
+ let FilterClass = "VCMPVCMPXTable";
+ let RowFields = ["VCMPOp"];
+ let ColFields = ["IsVCMPX"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
include "SIInstructions.td"
include "DSInstructions.td"