diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SOPInstructions.td')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 153 |
1 files changed, 109 insertions, 44 deletions
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 73ba2ae367f7..9d7b25d55217 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1,4 +1,4 @@ -//===-- SOPInstructions.td - SOP Instruction Defintions -------------------===// +//===-- SOPInstructions.td - SOP Instruction Definitions ------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -97,6 +97,17 @@ class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo < let has_sdst = 0; } +// Special case for movreld where sdst is treated as a use operand. +class SOP1_32_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_32:$sdst, SSrc_b32:$src0), + "$sdst, $src0", pattern>; + +// Special case for movreld where sdst is treated as a use operand. +class SOP1_64_movreld <string opName, list<dag> pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_64:$sdst, SSrc_b64:$src0), + "$sdst, $src0", pattern +>; + class SOP1_0_32R <string opName, list<dag> pattern = []> : SOP1_Pseudo < opName, (outs), (ins SReg_32:$src0), "$src0", pattern> { @@ -199,7 +210,9 @@ def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64", def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">; def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">; -def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">; +def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64", + [(set i32:$sdst, (AMDGPUffbl_b32 i64:$src0))] +>; def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32", [(set i32:$sdst, (AMDGPUffbl_b32 i32:$src0))] @@ -209,7 +222,9 @@ def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32", [(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))] >; -def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64">; +def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64", + [(set i32:$sdst, (AMDGPUffbh_u32 i64:$src0))] +>; def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32", [(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))] >; @@ -267,8 +282,8 @@ def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">; let Uses = [M0] in { def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">; def S_MOVRELS_B64 : SOP1_64R <"s_movrels_b64">; -def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">; -def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">; +def S_MOVRELD_B32 : SOP1_32_movreld <"s_movreld_b32">; +def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">; } // End Uses = [M0] let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in { @@ -283,8 +298,8 @@ def S_MOV_FED_B32 : SOP1_32 <"s_mov_fed_b32">; let SubtargetPredicate = HasVGPRIndexMode in { def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> { - let Uses = [M0]; - let Defs = [M0]; + let Uses = [M0, MODE]; + let Defs = [M0, MODE]; } } @@ -401,8 +416,14 @@ class UniformUnaryFrag<SDPatternOperator Op> : PatFrag < class UniformBinFrag<SDPatternOperator Op> : PatFrag < (ops node:$src0, node:$src1), (Op $src0, $src1), - [{ return !N->isDivergent(); }] ->; + [{ return !N->isDivergent(); }]> { + // This check is unnecessary as it's captured by the result register + // bank constraint. + // + // FIXME: Should add a way for the emitter to recognize this is a + // trivially true predicate to eliminate the check. + let GISelPredicateCode = [{return true;}]; +} let Defs = [SCC] in { // Carry out goes to SCC let isCommutable = 1 in { @@ -444,9 +465,19 @@ def S_MAX_U32 : SOP2_32 <"s_max_u32", } // End isCommutable = 1 } // End Defs = [SCC] +class SelectPat<SDPatternOperator select> : PatFrag < + (ops node:$src1, node:$src2), + (select SCC, $src1, $src2), + [{ return N->getOperand(0)->hasOneUse() && !N->isDivergent(); }] +>; let Uses = [SCC] in { - def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32">; + let AddedComplexity = 20 in { + def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32", + [(set i32:$sdst, (SelectPat<select> i32:$src0, i32:$src1))] + >; + } + def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">; } // End Uses = [SCC] @@ -524,22 +555,22 @@ let AddedComplexity = 1 in { let Defs = [SCC] in { // TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32", - [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag<shl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", - [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag<shl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", - [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag<srl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64", - [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag<srl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I32 : SOP2_32 <"s_ashr_i32", - [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_32:$sdst, (UniformBinFrag<sra> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64", - [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] + [(set SReg_64:$sdst, (UniformBinFrag<sra> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; } // End Defs = [SCC] @@ -592,14 +623,26 @@ let SubtargetPredicate = isGFX9Plus in { def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">; let Defs = [SCC] in { - def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32">; - def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32">; - def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32">; - def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32">; + def S_LSHL1_ADD_U32 : SOP2_32<"s_lshl1_add_u32", + [(set i32:$sdst, (shl1_add SSrc_b32:$src0, SSrc_b32:$src1))] + >; + def S_LSHL2_ADD_U32 : SOP2_32<"s_lshl2_add_u32", + [(set i32:$sdst, (shl2_add SSrc_b32:$src0, SSrc_b32:$src1))] + >; + def S_LSHL3_ADD_U32 : SOP2_32<"s_lshl3_add_u32", + [(set i32:$sdst, (shl3_add SSrc_b32:$src0, SSrc_b32:$src1))] + >; + def S_LSHL4_ADD_U32 : SOP2_32<"s_lshl4_add_u32", + [(set i32:$sdst, (shl4_add SSrc_b32:$src0, SSrc_b32:$src1))] + >; } // End Defs = [SCC] - def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32">; - def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32">; + let isCommutable = 1 in { + def S_MUL_HI_U32 : SOP2_32<"s_mul_hi_u32", + [(set i32:$sdst, (UniformBinFrag<mulhu> SSrc_b32:$src0, SSrc_b32:$src1))]>; + def S_MUL_HI_I32 : SOP2_32<"s_mul_hi_i32", + [(set i32:$sdst, (UniformBinFrag<mulhs> SSrc_b32:$src0, SSrc_b32:$src1))]>; + } } // End SubtargetPredicate = isGFX9Plus //===----------------------------------------------------------------------===// @@ -760,7 +803,11 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo < "$sdst, $simm16" >; +let hasSideEffects = 1 in { + let mayLoad = 1 in { +// s_getreg_b32 should use hasSideEffects = 1 for tablegen to allow +// its use in the readcyclecounter selection. def S_GETREG_B32 : SOPK_Pseudo < "s_getreg_b32", (outs SReg_32:$sdst), (ins hwreg:$simm16), @@ -768,14 +815,20 @@ def S_GETREG_B32 : SOPK_Pseudo < >; } -let hasSideEffects = 1 in { +let mayLoad = 0, mayStore =0 in { def S_SETREG_B32 : SOPK_Pseudo < "s_setreg_b32", (outs), (ins SReg_32:$sdst, hwreg:$simm16), "$simm16, $sdst", - [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))] ->; + [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> { + + // Use custom inserter to optimize some cases to + // S_DENORM_MODE/S_ROUND_MODE. + let usesCustomInserter = 1; + let Defs = [MODE]; + let Uses = [MODE]; +} // FIXME: Not on SI? //def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">; @@ -786,8 +839,11 @@ def S_SETREG_IMM32_B32 : SOPK_Pseudo < "$simm16, $imm"> { let Size = 8; // Unlike every other SOPK instruction. let has_sdst = 0; + let Defs = [MODE]; + let Uses = [MODE]; } +} } // End hasSideEffects = 1 class SOPK_WAITCNT<string opName, list<dag> pat=[]> : @@ -920,12 +976,16 @@ def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>; } // End SubtargetPredicate = isGFX8Plus let SubtargetPredicate = HasVGPRIndexMode in { +// Setting the GPR index mode is really writing the fields in the mode +// register. We don't want to add mode register uses to every +// instruction, and it's too complicated to deal with anyway. This is +// modeled just as a side effect. def S_SET_GPR_IDX_ON : SOPC <0x11, (outs), (ins SSrc_b32:$src0, GPRIdxMode:$src1), "s_set_gpr_idx_on $src0,$src1"> { - let Defs = [M0]; // No scc def - let Uses = [M0]; // Other bits of m0 unmodified. + let Defs = [M0, MODE]; // No scc def + let Uses = [M0, MODE]; // Other bits of mode, m0 unmodified. let hasSideEffects = 1; // Sets mode.gpr_idx_en let FixedSize = 1; } @@ -1099,7 +1159,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> { let mayStore = 1; } -let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16", [(int_amdgcn_s_waitcnt timm:$simm16)]>; def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; @@ -1112,8 +1172,8 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16), "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { let hasSideEffects = 1; - let mayLoad = 1; - let mayStore = 1; + let mayLoad = 0; + let mayStore = 0; } def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">; @@ -1138,14 +1198,14 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16", [(int_amdgcn_s_incperflevel timm:$simm16)]> { let hasSideEffects = 1; - let mayLoad = 1; - let mayStore = 1; + let mayLoad = 0; + let mayStore = 0; } def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16", [(int_amdgcn_s_decperflevel timm:$simm16)]> { let hasSideEffects = 1; - let mayLoad = 1; - let mayStore = 1; + let mayLoad = 0; + let mayStore = 0; } def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> { let simm16 = 0; @@ -1154,6 +1214,8 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> { let SubtargetPredicate = HasVGPRIndexMode in { def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> { let simm16 = 0; + let Defs = [MODE]; + let Uses = [MODE]; } } } // End hasSideEffects @@ -1161,7 +1223,8 @@ def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> { let SubtargetPredicate = HasVGPRIndexMode in { def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16), "s_set_gpr_idx_mode$simm16"> { - let Defs = [M0]; + let Defs = [M0, MODE]; + let Uses = [MODE]; } } @@ -1176,13 +1239,15 @@ let SubtargetPredicate = isGFX10Plus in { } def S_WAITCNT_DEPCTR : SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">; - def S_ROUND_MODE : - SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">; - def S_DENORM_MODE : - SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16", - [(SIdenorm_mode (i32 timm:$simm16))]> { - let hasSideEffects = 1; - } + + let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in { + def S_ROUND_MODE : + SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">; + def S_DENORM_MODE : + SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16", + [(SIdenorm_mode (i32 timm:$simm16))]>; + } + def S_TTRACEDATA_IMM : SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">; } // End SubtargetPredicate = isGFX10Plus @@ -1223,7 +1288,7 @@ def : GCNPat < // Same as a 32-bit inreg def : GCNPat< - (i32 (sext i16:$src)), + (i32 (UniformUnaryFrag<sext> i16:$src)), (S_SEXT_I32_I16 $src) >; @@ -1250,7 +1315,7 @@ def : GCNPat< >; def : GCNPat < - (i64 (sext i16:$src)), + (i64 (UniformUnaryFrag<sext> i16:$src)), (REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0, (i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (S_MOV_B32 (i32 31))), SGPR_32)), sub1) >; |