aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td93
1 files changed, 88 insertions, 5 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 95a1d8696347..ef652fce6548 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -571,6 +571,7 @@ let SubtargetPredicate = isGFX9Only in {
} // End SubtargetPredicate = isGFX9Only
class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> {
+ let HasExtDPP = 1;
let HasExtSDWA = 1;
let HasExtSDWA9 = 1;
let HasExt = 1;
@@ -599,6 +600,7 @@ class Cvt_F32_F8_Pat<SDPatternOperator node, int index,
(inst_sdwa 0, $src, 0, 0, index)
>;
+let SubtargetPredicate = isGFX9Only in {
let OtherPredicates = [HasCvtFP8VOP1Bug] in {
def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)),
(V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>;
@@ -617,6 +619,7 @@ foreach Index = [1, 2, 3] in {
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>;
def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>;
}
+} // End SubtargetPredicate = isGFX9Only
class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat<
@@ -626,11 +629,77 @@ class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index,
(inst_e32 $src))
>;
-foreach Index = [0, -1] in {
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
- V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
- def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
- V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+let SubtargetPredicate = isGFX9Only in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>;
+ def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>;
+ }
+}
+
+
+// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions.
+def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> {
+ let HasOpSel = 1;
+ let HasExtVOP3DPP = 0;
+}
+
+def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]> {
+ let HasOpSel = 1;
+ let HasExtDPP = 1;
+ let HasExtVOP3DPP = 1;
+ let IsFP8 = 1;
+ let HasClamp = 0;
+ let HasOMod = 0;
+ let HasModifiers = 1;
+ let Src1VOP3DPP = Src1RC64;
+}
+
+let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0,
+ SchedRW = [WriteFloatCvt] in {
+ defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+ defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>;
+}
+
+class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 !if(index{0},
+ !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1),
+ SRCMODS.OP_SEL_0),
+ !if(index{1}, SRCMODS.OP_SEL_1, 0)),
+ $src, 0),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, 1, 2, 3] in {
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index,
+ V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>;
+ def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index,
+ V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>;
+ }
+}
+
+class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index,
+ VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat<
+ (v2f32 (node i32:$src, index)),
+ !if (index,
+ (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE),
+ (inst_e32 $src))
+>;
+
+let SubtargetPredicate = isGFX12Plus in {
+ foreach Index = [0, -1] in {
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index,
+ V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>;
+ def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index,
+ V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_OP_SEL_e64>;
+ }
}
let SubtargetPredicate = isGFX10Plus in {
@@ -853,6 +922,20 @@ multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op,
VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>;
+// Define VOP1 instructions using the pseudo instruction with its old profile and
+// VOP3 using the OpSel profile for the pseudo instruction.
+defm V_CVT_F32_FP8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">;
+defm V_CVT_F32_FP8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
+
+defm V_CVT_F32_BF8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06d, "V_CVT_F32_BF8", "v_cvt_f32_bf8">;
+defm V_CVT_F32_BF8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
+
+defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_OP_SEL", "v_cvt_pk_f32_fp8">;
+
+defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">;
+
defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
"V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d,