aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SOPInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SOPInstructions.td')
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td153
1 files changed, 109 insertions, 44 deletions
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 73ba2ae367f7..9d7b25d55217 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1,4 +1,4 @@
-//===-- SOPInstructions.td - SOP Instruction Defintions -------------------===//
+//===-- SOPInstructions.td - SOP Instruction Definitions ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -97,6 +97,17 @@ class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
let has_sdst = 0;
}
+// Special case for movreld where sdst is treated as a use operand.
+class SOP1_32_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs), (ins SReg_32:$sdst, SSrc_b32:$src0),
+ "$sdst, $src0", pattern>;
+
+// Special case for movreld where sdst is treated as a use operand.
+class SOP1_64_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+ opName, (outs), (ins SReg_64:$sdst, SSrc_b64:$src0),
+ "$sdst, $src0", pattern
+>;
+
class SOP1_0_32R <string opName, list<dag> pattern = []> : SOP1_Pseudo <
opName, (outs), (ins SReg_32:$src0),
"$src0", pattern> {
@@ -199,7 +210,9 @@ def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64",
def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">;
def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">;
-def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">;
+def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64",
+ [(set i32:$sdst, (AMDGPUffbl_b32 i64:$src0))]
+>;
def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32",
[(set i32:$sdst, (AMDGPUffbl_b32 i32:$src0))]
@@ -209,7 +222,9 @@ def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32",
[(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))]
>;
-def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64">;
+def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64",
+ [(set i32:$sdst, (AMDGPUffbh_u32 i64:$src0))]
+>;
def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32",
[(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))]
>;
@@ -267,8 +282,8 @@ def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">;
let Uses = [M0] in {
def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
def S_MOVRELS_B64 : SOP1_64R <"s_movrels_b64">;
-def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
-def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
+def S_MOVRELD_B32 : SOP1_32_movreld <"s_movreld_b32">;
+def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">;
} // End Uses = [M0]
let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in {
@@ -283,8 +298,8 @@ def S_MOV_FED_B32 : SOP1_32 <"s_mov_fed_b32">;
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> {
- let Uses = [M0];
- let Defs = [M0];
+ let Uses = [M0, MODE];
+ let Defs = [M0, MODE];
}
}
@@ -401,8 +416,14 @@ class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
(ops node:$src0, node:$src1),
(Op $src0, $src1),
- [{ return !N->isDivergent(); }]
->;
+ [{ return !N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
@@ -444,9 +465,19 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32",
} // End isCommutable = 1
} // End Defs = [SCC]
+class SelectPat<SDPatternOperator select> : PatFrag <
+ (ops node:$src1, node:$src2),
+ (select SCC, $src1, $src2),
+ [{ return N->getOperand(0)->hasOneUse() && !N->isDivergent(); }]
+>;
let Uses = [SCC] in {
- def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32">;
+ let AddedComplexity = 20 in {
+ def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32",
+ [(set i32:$sdst, (SelectPat<select> i32:$src0, i32:$src1))]
+ >;
+ }
+
def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
} // End Uses = [SCC]
@@ -524,22 +555,22 @@ let AddedComplexity = 1 in {
let Defs = [SCC] in {
// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
- [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<shl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
- [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<shl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
- [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<srl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
- [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<srl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
- [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<sra> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
- [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<sra> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
} // End Defs = [SCC]
@@ -592,14 +623,26 @@ let SubtargetPredicate = isGFX9Plus in {
def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
let Defs = [SCC] in {
- def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32">;
- def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32">;
- def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32">;
- def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32">;
+ def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32",
+ [(set i32:$sdst, (shl1_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
+ def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32",
+ [(set i32:$sdst, (shl2_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
+ def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32",
+ [(set i32:$sdst, (shl3_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
+ def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32",
+ [(set i32:$sdst, (shl4_add SSrc_b32:$src0, SSrc_b32:$src1))]
+ >;
} // End Defs = [SCC]
- def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32">;
- def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32">;
+ let isCommutable = 1 in {
+ def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32",
+ [(set i32:$sdst, (UniformBinFrag<mulhu> SSrc_b32:$src0, SSrc_b32:$src1))]>;
+ def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32",
+ [(set i32:$sdst, (UniformBinFrag<mulhs> SSrc_b32:$src0, SSrc_b32:$src1))]>;
+ }
} // End SubtargetPredicate = isGFX9Plus
//===----------------------------------------------------------------------===//
@@ -760,7 +803,11 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
"$sdst, $simm16"
>;
+let hasSideEffects = 1 in {
+
let mayLoad = 1 in {
+// s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow
+// its use in the readcyclecounter selection.
def S_GETREG_B32 : SOPK_Pseudo <
"s_getreg_b32",
(outs SReg_32:$sdst), (ins hwreg:$simm16),
@@ -768,14 +815,20 @@ def S_GETREG_B32 : SOPK_Pseudo <
>;
}
-let hasSideEffects = 1 in {
+let mayLoad = 0, mayStore =0 in {
def S_SETREG_B32 : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
- [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]
->;
+ [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
+
+ // Use custom inserter to optimize some cases to
+ // S_DENORM_MODE/S_ROUND_MODE.
+ let usesCustomInserter = 1;
+ let Defs = [MODE];
+ let Uses = [MODE];
+}
// FIXME: Not on SI?
//def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">;
@@ -786,8 +839,11 @@ def S_SETREG_IMM32_B32 : SOPK_Pseudo <
"$simm16, $imm"> {
let Size = 8; // Unlike every other SOPK instruction.
let has_sdst = 0;
+ let Defs = [MODE];
+ let Uses = [MODE];
}
+}
} // End hasSideEffects = 1
class SOPK_WAITCNT<string opName, list<dag> pat=[]> :
@@ -920,12 +976,16 @@ def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
} // End SubtargetPredicate = isGFX8Plus
let SubtargetPredicate = HasVGPRIndexMode in {
+// Setting the GPR index mode is really writing the fields in the mode
+// register. We don't want to add mode register uses to every
+// instruction, and it's too complicated to deal with anyway. This is
+// modeled just as a side effect.
def S_SET_GPR_IDX_ON : SOPC <0x11,
(outs),
(ins SSrc_b32:$src0, GPRIdxMode:$src1),
"s_set_gpr_idx_on $src0,$src1"> {
- let Defs = [M0]; // No scc def
- let Uses = [M0]; // Other bits of m0 unmodified.
+ let Defs = [M0, MODE]; // No scc def
+ let Uses = [M0, MODE]; // Other bits of mode, m0 unmodified.
let hasSideEffects = 1; // Sets mode.gpr_idx_en
let FixedSize = 1;
}
@@ -1099,7 +1159,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
let mayStore = 1;
}
-let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
@@ -1112,8 +1172,8 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
"s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
@@ -1138,14 +1198,14 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
}
def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
let simm16 = 0;
@@ -1154,6 +1214,8 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> {
let simm16 = 0;
+ let Defs = [MODE];
+ let Uses = [MODE];
}
}
} // End hasSideEffects
@@ -1161,7 +1223,8 @@ def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> {
let SubtargetPredicate = HasVGPRIndexMode in {
def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16),
"s_set_gpr_idx_mode$simm16"> {
- let Defs = [M0];
+ let Defs = [M0, MODE];
+ let Uses = [MODE];
}
}
@@ -1176,13 +1239,15 @@ let SubtargetPredicate = isGFX10Plus in {
}
def S_WAITCNT_DEPCTR :
SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">;
- def S_ROUND_MODE :
- SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
- def S_DENORM_MODE :
- SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
- [(SIdenorm_mode (i32 timm:$simm16))]> {
- let hasSideEffects = 1;
- }
+
+ let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in {
+ def S_ROUND_MODE :
+ SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
+ def S_DENORM_MODE :
+ SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
+ [(SIdenorm_mode (i32 timm:$simm16))]>;
+ }
+
def S_TTRACEDATA_IMM :
SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
} // End SubtargetPredicate = isGFX10Plus
@@ -1223,7 +1288,7 @@ def : GCNPat <
// Same as a 32-bit inreg
def : GCNPat<
- (i32 (sext i16:$src)),
+ (i32 (UniformUnaryFrag<sext> i16:$src)),
(S_SEXT_I32_I16 $src)
>;
@@ -1250,7 +1315,7 @@ def : GCNPat<
>;
def : GCNPat <
- (i64 (sext i16:$src)),
+ (i64 (UniformUnaryFrag<sext> i16:$src)),
(REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0,
(i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (S_MOV_B32 (i32 31))), SGPR_32)), sub1)
>;