summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/SIInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/SIInstructions.td')
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td297
1 files changed, 200 insertions, 97 deletions
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 70f20bb69370..21984c6ad910 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
- (i32 imm:$attr)))]
+ [(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc,
+ (i32 timm:$attrchan), (i32 timm:$attr), M0))]
>;
let OtherPredicates = [has32BankLDS] in {
@@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
- (i32 imm:$attr)))]>;
+ [(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc,
+ (i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
@@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
- (i32 imm:$attr)))]>;
+ [(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc),
+ (i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
} // End Uses = [M0, EXEC]
@@ -92,6 +92,11 @@ def ATOMIC_FENCE : SPseudoInstSI<
let maybeAtomic = 1;
}
+def VOP_I64_I64_DPP : VOPProfile <[i64, i64, untyped, untyped]> {
+ let HasExt = 1;
+ let HasExtDPP = 1;
+}
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
// For use in patterns
@@ -107,10 +112,19 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
+// 64-bit vector move with dpp. Expanded post-RA.
+def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64_DPP> {
+ let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.
+}
+
// Pseudoinstruction for @llvm.amdgcn.wqm. It is turned into a copy after the
// WQM pass processes it.
def WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
+// Pseudoinstruction for @llvm.amdgcn.softwqm. Like @llvm.amdgcn.wqm it is
+// turned into a copy by WQM pass, but does not seed WQM requirements.
+def SOFT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
+
// Pseudoinstruction for @llvm.amdgcn.wwm. It is turned into a copy post-RA, so
// that the @earlyclobber is respected. The @earlyclobber is to make sure that
// the instruction that defines $src0 (which is run in WWM) doesn't
@@ -345,13 +359,15 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
}
def SI_INIT_EXEC : SPseudoInstSI <
- (outs), (ins i64imm:$src), []> {
+ (outs), (ins i64imm:$src),
+ [(int_amdgcn_init_exec (i64 timm:$src))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
let isAsCheapAsAMove = 1;
let WaveSizePredicate = isWave64;
}
+// FIXME: Intrinsic should be mangled for wave size.
def SI_INIT_EXEC_LO : SPseudoInstSI <
(outs), (ins i32imm:$src), []> {
let Defs = [EXEC_LO];
@@ -360,12 +376,20 @@ def SI_INIT_EXEC_LO : SPseudoInstSI <
let WaveSizePredicate = isWave32;
}
+// FIXME: Wave32 version
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
- (outs), (ins SSrc_b32:$input, i32imm:$shift), []> {
+ (outs), (ins SSrc_b32:$input, i32imm:$shift),
+ [(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
}
+def : GCNPat <
+ (int_amdgcn_init_exec timm:$src),
+ (SI_INIT_EXEC_LO (as_i32imm imm:$src))> {
+ let WaveSizePredicate = isWave32;
+}
+
// Return for returning shaders to a shader variant epilog.
def SI_RETURN_TO_EPILOG : SPseudoInstSI <
(outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
@@ -604,25 +628,6 @@ def : GCNPat <
(SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
>;
-def : GCNPat <
- (AMDGPUinit_exec i64:$src),
- (SI_INIT_EXEC (as_i64imm $src))
-> {
- let WaveSizePredicate = isWave64;
-}
-
-def : GCNPat <
- (AMDGPUinit_exec i64:$src),
- (SI_INIT_EXEC_LO (as_i32imm $src))
-> {
- let WaveSizePredicate = isWave32;
-}
-
-def : GCNPat <
- (AMDGPUinit_exec_from_input i32:$input, i32:$shift),
- (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift))
->;
-
def : GCNPat<
(AMDGPUtrap timm:$trapid),
(S_TRAP $trapid)
@@ -740,22 +745,22 @@ def : GCNPat <
def : GCNPat <
(i32 (fp_to_sint f16:$src)),
- (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src))
+ (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src))
>;
def : GCNPat <
(i32 (fp_to_uint f16:$src)),
- (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src))
+ (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 VSrc_b32:$src))
>;
def : GCNPat <
(f16 (sint_to_fp i32:$src)),
- (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src))
+ (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 VSrc_b32:$src))
>;
def : GCNPat <
(f16 (uint_to_fp i32:$src)),
- (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src))
+ (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 VSrc_b32:$src))
>;
//===----------------------------------------------------------------------===//
@@ -808,8 +813,14 @@ def : GCNPat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
}
+
def : GCNPat <
- (i16 (add (i16 (trunc (getDivergentFrag<ctpop>.ret i32:$popcnt))), i16:$val)),
+ (i32 (ctpop i32:$popcnt)),
+ (V_BCNT_U32_B32_e64 VSrc_b32:$popcnt, (i32 0))
+>;
+
+def : GCNPat <
+ (i16 (add (i16 (trunc (i32 (getDivergentFrag<ctpop>.ret i32:$popcnt)))), i16:$val)),
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
@@ -1076,53 +1087,158 @@ def : GCNPat <
/********** ================================ **********/
// Prevent expanding both fneg and fabs.
+// TODO: Add IgnoredBySelectionDAG bit?
+let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG
def : GCNPat <
- (fneg (fabs f32:$src)),
- (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit
+ (fneg (fabs (f32 SReg_32:$src))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit
>;
-// FIXME: Should use S_OR_B32
def : GCNPat <
- (fneg (fabs f64:$src)),
- (REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
- sub0,
- (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
- sub1)
+ (fabs (f32 SReg_32:$src)),
+ (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff)))
+>;
+
+def : GCNPat <
+ (fneg (f32 SReg_32:$src)),
+ (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000)))
+>;
+
+def : GCNPat <
+ (fneg (f16 SReg_32:$src)),
+ (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000)))
+>;
+
+def : GCNPat <
+ (fneg (f16 VGPR_32:$src)),
+ (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fabs (f16 SReg_32:$src)),
+ (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff)))
+>;
+
+def : GCNPat <
+ (fneg (fabs (f16 SReg_32:$src))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
+>;
+
+def : GCNPat <
+ (fneg (fabs (f16 VGPR_32:$src))),
+ (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
+>;
+
+def : GCNPat <
+ (fneg (v2f16 SReg_32:$src)),
+ (S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000)))
+>;
+
+def : GCNPat <
+ (fabs (v2f16 SReg_32:$src)),
+ (S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff)))
+>;
+
+// This is really (fneg (fabs v2f16:$src))
+//
+// fabs is not reported as free because there is modifier for it in
+// VOP3P instructions, so it is turned into the bit op.
+def : GCNPat <
+ (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
def : GCNPat <
- (fabs f32:$src),
- (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fffffff)))
+ (fneg (v2f16 (fabs SReg_32:$src))),
+ (S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
+>;
+
+// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled
+ // def : GCNPat <
+// (fneg (f64 SReg_64:$src)),
+// (REG_SEQUENCE SReg_64,
+// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+// sub0,
+// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+// (i32 (S_MOV_B32 (i32 0x80000000)))),
+// sub1)
+// >;
+
+// def : GCNPat <
+// (fneg (fabs (f64 SReg_64:$src))),
+// (REG_SEQUENCE SReg_64,
+// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+// sub0,
+// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+// (S_MOV_B32 (i32 0x80000000))), // Set sign bit.
+// sub1)
+// >;
+
+} // End let AddedComplexity = 1
+
+def : GCNPat <
+ (fabs (f32 VGPR_32:$src)),
+ (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (f32 VGPR_32:$src)),
+ (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fabs (f16 VGPR_32:$src)),
+ (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
>;
def : GCNPat <
- (fneg f32:$src),
- (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000)))
+ (fneg (v2f16 VGPR_32:$src)),
+ (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
- (fabs f64:$src),
+ (fabs (v2f16 VGPR_32:$src)),
+ (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (v2f16 (fabs VGPR_32:$src))),
+ (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit
+>;
+
+def : GCNPat <
+ (fabs (f64 VReg_64:$src)),
(REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
+ (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
(V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
sub1)
>;
+// TODO: Use SGPR for constant
def : GCNPat <
- (fneg f64:$src),
+ (fneg (f64 VReg_64:$src)),
(REG_SEQUENCE VReg_64,
- (i32 (EXTRACT_SUBREG f64:$src, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
+ (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
(i32 (V_MOV_B32_e32 (i32 0x80000000)))),
sub1)
>;
+// TODO: Use SGPR for constant
+def : GCNPat <
+ (fneg (fabs (f64 VReg_64:$src))),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
+ sub0,
+ (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
+ (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
+ sub1)
+>;
+
def : GCNPat <
(fcopysign f16:$src0, f16:$src1),
(V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
@@ -1154,45 +1270,6 @@ def : GCNPat <
(V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
>;
-def : GCNPat <
- (fneg f16:$src),
- (S_XOR_B32 $src, (S_MOV_B32 (i32 0x00008000)))
->;
-
-def : GCNPat <
- (fabs f16:$src),
- (S_AND_B32 $src, (S_MOV_B32 (i32 0x00007fff)))
->;
-
-def : GCNPat <
- (fneg (fabs f16:$src)),
- (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
->;
-
-def : GCNPat <
- (fneg v2f16:$src),
- (S_XOR_B32 $src, (S_MOV_B32 (i32 0x80008000)))
->;
-
-def : GCNPat <
- (fabs v2f16:$src),
- (S_AND_B32 $src, (S_MOV_B32 (i32 0x7fff7fff)))
->;
-
-// This is really (fneg (fabs v2f16:$src))
-//
-// fabs is not reported as free because there is modifier for it in
-// VOP3P instructions, so it is turned into the bit op.
-def : GCNPat <
- (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))),
- (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
->;
-
-def : GCNPat <
- (fneg (v2f16 (fabs v2f16:$src))),
- (S_OR_B32 $src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
->;
-
/********** ================== **********/
/********** Immediate Patterns **********/
/********** ================== **********/
@@ -1544,7 +1621,7 @@ def : GCNPat <
(V_CVT_F16_F32_e32 (
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
- $src))
+ SSrc_i1:$src))
>;
def : GCNPat <
@@ -1552,35 +1629,35 @@ def : GCNPat <
(V_CVT_F16_F32_e32 (
V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
- $src))
+ SSrc_i1:$src))
>;
def : GCNPat <
(f32 (sint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
- $src)
+ SSrc_i1:$src)
>;
def : GCNPat <
(f32 (uint_to_fp i1:$src)),
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
- $src)
+ SSrc_i1:$src)
>;
def : GCNPat <
(f64 (sint_to_fp i1:$src)),
(V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 -1),
- $src))
+ SSrc_i1:$src))
>;
def : GCNPat <
(f64 (uint_to_fp i1:$src)),
(V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
/*src1mod*/(i32 0), /*src1*/(i32 1),
- $src))
+ SSrc_i1:$src))
>;
//===----------------------------------------------------------------------===//
@@ -1788,6 +1865,22 @@ def : GCNPat <
(INSERT_SUBREG (IMPLICIT_DEF), $src0, sub0)
>;
+def : GCNPat <
+ (i64 (int_amdgcn_mov_dpp i64:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
+ timm:$bound_ctrl)),
+ (V_MOV_B64_DPP_PSEUDO $src, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl))
+>;
+
+def : GCNPat <
+ (i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask,
+ timm:$bank_mask, timm:$bound_ctrl)),
+ (V_MOV_B64_DPP_PSEUDO $old, $src, (as_i32imm $dpp_ctrl),
+ (as_i32imm $row_mask), (as_i32imm $bank_mask),
+ (as_i1imm $bound_ctrl))
+>;
+
//===----------------------------------------------------------------------===//
// Fract Patterns
//===----------------------------------------------------------------------===//
@@ -1915,3 +2008,13 @@ def : FP16Med3Pat<f16, V_MED3_F16>;
defm : Int16Med3Pat<V_MED3_I16, smin, smax, smax_oneuse, smin_oneuse>;
defm : Int16Med3Pat<V_MED3_U16, umin, umax, umax_oneuse, umin_oneuse>;
} // End Predicates = [isGFX9Plus]
+
+class AMDGPUGenericInstruction : GenericInstruction {
+ let Namespace = "AMDGPU";
+}
+
+def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = 0;
+}