summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/VOP3PInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/VOP3PInstructions.td')
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td99
1 files changed, 87 insertions, 12 deletions
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
index 3becf758aaa3e..eeee8b36c1753 100644
--- a/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -18,16 +18,25 @@ class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag>
// Non-packed instructions that use the VOP3P encoding.
// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
-class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+class VOP3_VOP3PInst<string OpName, VOPProfile P, bit UseTiedOutput = 0,
+ SDPatternOperator node = null_frag> :
VOP3P_Pseudo<OpName, P> {
+ // These operands are only sort of f16 operands. Depending on
+ // op_sel_hi, these may be interpreted as f32. The inline immediate
+ // values are really f16 converted to f32, so we treat these as f16
+ // operands.
let InOperandList =
- (ins
- FP32InputMods:$src0_modifiers, VCSrc_f32:$src0,
- FP32InputMods:$src1_modifiers, VCSrc_f32:$src1,
- FP32InputMods:$src2_modifiers, VCSrc_f32:$src2,
- clampmod:$clamp,
- op_sel:$op_sel,
- op_sel_hi:$op_sel_hi);
+ !con(
+ !con(
+ (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0,
+ FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
+ FP16InputMods:$src2_modifiers, VCSrc_f16:$src2,
+ clampmod:$clamp),
+ !if(UseTiedOutput, (ins VGPR_32:$vdst_in), (ins))),
+ (ins op_sel:$op_sel, op_sel_hi:$op_sel_hi));
+
+ let Constraints = !if(UseTiedOutput, "$vdst = $vdst_in", "");
+ let DisableEncoding = !if(UseTiedOutput, "$vdst_in", "");
let AsmOperands =
" $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp";
}
@@ -59,14 +68,80 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I1
def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
-// XXX - Commutable?
+
+let SubtargetPredicate = HasMadMixInsts in {
// These are VOP3a-like opcodes which accept no omod.
// Size of src arguments (16/32) is controlled by op_sel.
// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
-def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_V2F16_V2F16_V2F16>>;
-def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
-def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
+let isCommutable = 1 in {
+def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
+
+// Clamp modifier is applied after conversion to f16.
+def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
+
+let ClampLo = 0, ClampHi = 1 in {
+def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
+}
+}
+
+def : GCNPat <
+ (f16 (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
+ (V_MAD_MIXLO_F16 $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ $src2_modifiers, $src2,
+ DSTCLAMP.NONE,
+ (i32 (IMPLICIT_DEF)))
+>;
+
+// FIXME: Special case handling for maxhi (especially for clamp)
+// because dealing with the write to high half of the register is
+// difficult.
+def : GCNPat <
+ (build_vector f16:$elt0, (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
+ (v2f16 (V_MAD_MIXHI_F16 $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ $src2_modifiers, $src2,
+ DSTCLAMP.NONE,
+ $elt0))
+>;
+
+def : GCNPat <
+ (build_vector
+ f16:$elt0,
+ (AMDGPUclamp (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
+ (v2f16 (V_MAD_MIXHI_F16 $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ $src2_modifiers, $src2,
+ DSTCLAMP.ENABLE,
+ $elt0))
+>;
+
+def : GCNPat <
+ (AMDGPUclamp (build_vector
+ (fpround (fmad (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))),
+ (fpround (fmad (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)),
+ (f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))),
+ (v2f16 (V_MAD_MIXHI_F16 $hi_src0_modifiers, $hi_src0,
+ $hi_src1_modifiers, $hi_src1,
+ $hi_src2_modifiers, $hi_src2,
+ DSTCLAMP.ENABLE,
+ (V_MAD_MIXLO_F16 $lo_src0_modifiers, $lo_src0,
+ $lo_src1_modifiers, $lo_src1,
+ $lo_src2_modifiers, $lo_src2,
+ DSTCLAMP.ENABLE,
+ (i32 (IMPLICIT_DEF)))))
+>;
+} // End SubtargetPredicate = [HasMadMixInsts]
multiclass VOP3P_Real_vi<bits<10> op> {
def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>,