summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/VOP2Instructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/VOP2Instructions.td')
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td76
1 files changed, 52 insertions, 24 deletions
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index aaadc3dbc7215..aa37dbf1418f9 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1,4 +1,4 @@
-//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
+//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -69,9 +69,13 @@ class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suf
let mayStore = 0;
let hasSideEffects = 0;
+ let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);
+
+ let mayRaiseFPException = ReadsModeReg;
+
let VOP2 = 1;
let VALU = 1;
- let Uses = [EXEC];
+ let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let AsmVariantName = AMDGPUAsmVariants.Default;
}
@@ -459,17 +463,18 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
//===----------------------------------------------------------------------===//
defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
+let SubtargetPredicate = HasMadMacF32Insts in
def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
+defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>;
defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
-defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
-defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
+defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
-defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
@@ -484,12 +489,16 @@ defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
+let mayRaiseFPException = 0 in {
+let SubtargetPredicate = HasMadMacF32Insts in {
let Constraints = "$vdst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
}
def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
+} // End SubtargetPredicate = HasMadMacF32Insts
+}
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
@@ -529,8 +538,12 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
+
+let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
+}
+
defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
@@ -541,14 +554,18 @@ defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmi
defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
} // End SubtargetPredicate = isGFX6GFX7
-let SubtargetPredicate = isGFX6GFX7GFX10 in {
let isCommutable = 1 in {
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+let OtherPredicates = [HasMadMacF32Insts] in
defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>;
-} // End isCommutable = 1
} // End SubtargetPredicate = isGFX6GFX7GFX10
+let SubtargetPredicate = isGFX6GFX7 in {
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
+} // End SubtargetPredicate = isGFX6GFX7
+} // End isCommutable = 1
+
class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
GCNPat<
@@ -617,15 +634,19 @@ defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>;
let isCommutable = 1 in {
let FPDPRounding = 1 in {
-defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>;
+defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>;
defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
-defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
+defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>;
+
+let mayRaiseFPException = 0 in {
def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
+}
+
} // End FPDPRounding = 1
-defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>;
-defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>;
-defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
+defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
+defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
+defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
@@ -770,16 +791,16 @@ let Predicates = [Has16BitInsts] in {
// an inline immediate than -c.
// TODO: Also do for 64-bit.
def : GCNPat<
- (add i16:$src0, (i16 NegSubInlineConst16:$src1)),
- (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+ (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
>;
let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in {
def : GCNPat<
- (i32 (zext (add i16:$src0, (i16 NegSubInlineConst16:$src1)))),
- (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1)
+ (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))),
+ (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
>;
defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
@@ -831,7 +852,7 @@ class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps,
class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
VOP2_DPP<op, ps, opName, p, 1> {
- let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst);
+ let AssemblerPredicate = HasDPP16;
let SubtargetPredicate = HasDPP16;
}
@@ -857,7 +878,7 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
let Inst{30-25} = op;
let Inst{31} = 0x0;
- let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst);
+ let AssemblerPredicate = HasDPP8;
let SubtargetPredicate = HasDPP8;
}
@@ -1250,9 +1271,9 @@ defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>;
defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>;
-let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in {
+let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>;
-} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in)
+} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
let SubtargetPredicate = isGFX6GFX7 in {
defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
@@ -1261,6 +1282,7 @@ let SubtargetPredicate = isGFX6GFX7 in {
defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>;
defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>;
+let OtherPredicates = [HasMadMacF32Insts] in
defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>;
@@ -1593,3 +1615,9 @@ let SubtargetPredicate = HasDot3Insts in {
let SubtargetPredicate = HasPkFmacF16Inst in {
defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
} // End SubtargetPredicate = HasPkFmacF16Inst
+
+let SubtargetPredicate = HasDot3Insts in {
+ // NB: Opcode conflicts with V_DOT2C_F32_F16
+ let DecoderNamespace = "GFX10_B" in
+ defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>;
+}