diff options
Diffstat (limited to 'lib/Target/AMDGPU/R600Instructions.td')
-rw-r--r-- | lib/Target/AMDGPU/R600Instructions.td | 124 |
1 files changed, 56 insertions, 68 deletions
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 33ef6a4e19ead..b6b576d952783 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -15,7 +15,7 @@ include "R600Intrinsics.td" include "R600InstrFormats.td" -class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : +class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern = []> : InstR600 <outs, ins, asm, pattern, NullALU> { let Namespace = "AMDGPU"; @@ -160,7 +160,8 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, let Inst{63-32} = Word1; } -class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, +class R600_2OP_Helper <bits<11> inst, string opName, + SDPatternOperator node = null_frag, InstrItinClass itin = AnyALU> : R600_2OP <inst, opName, [(set R600_Reg32:$dst, (node R600_Reg32:$src0, @@ -283,7 +284,7 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask, } class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> - : InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>, + : InstR600ISA <outs, (ins MEMxi:$src_gpr), !strconcat(" ", name), pattern>, VTX_WORD1_GPR { // Static fields @@ -328,18 +329,44 @@ class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern> class LoadParamFrag <PatFrag load_type> : PatFrag < (ops node:$ptr), (load_type node:$ptr), - [{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }] + [{ return isConstantLoad(cast<LoadSDNode>(N), 0) || + (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }] >; def load_param : LoadParamFrag<load>; def load_param_exti8 : LoadParamFrag<az_extloadi8>; def load_param_exti16 : LoadParamFrag<az_extloadi16>; -def isR600 : Predicate<"Subtarget->getGeneration() <= AMDGPUSubtarget::R700">; +class LoadVtxId1 <PatFrag load> : PatFrag < + (ops node:$ptr), (load node:$ptr), [{ + const MemSDNode *LD = cast<MemSDNode>(N); + return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + !isa<GlobalValue>(GetUnderlyingObject( + LD->getMemOperand()->getValue(), CurDAG->getDataLayout()))); +}]>; + +def vtx_id1_az_extloadi8 : LoadVtxId1 <az_extloadi8>; +def vtx_id1_az_extloadi16 : LoadVtxId1 <az_extloadi16>; +def vtx_id1_load : LoadVtxId1 <load>; + +class LoadVtxId2 <PatFrag load> : PatFrag < + (ops node:$ptr), (load node:$ptr), [{ + const MemSDNode *LD = cast<MemSDNode>(N); + return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + isa<GlobalValue>(GetUnderlyingObject( + LD->getMemOperand()->getValue(), CurDAG->getDataLayout())); +}]>; + +def vtx_id2_az_extloadi8 : LoadVtxId2 <az_extloadi8>; +def vtx_id2_az_extloadi16 : LoadVtxId2 <az_extloadi16>; +def vtx_id2_load : LoadVtxId2 <load>; + +def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; def isR600toCayman : Predicate< - "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS">; + "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; //===----------------------------------------------------------------------===// // R600 SDNodes @@ -407,8 +434,7 @@ def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), - "INTERP_LOAD $src0 : $dst", - [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; + "INTERP_LOAD $src0 : $dst">; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; @@ -474,28 +500,6 @@ class ExportBufWord1 { } multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { - def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg), - (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0), - 0, 61, 0, 7, 7, 7, cf_inst, 0) - >; - - def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg), - (ExportInst - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $reg, sub0), - 0, 61, 7, 0, 7, 7, cf_inst, 0) - >; - - def : Pat<(int_R600_store_dummy (i32 imm:$type)), - (ExportInst - (v4f32 (IMPLICIT_DEF)), imm:$type, 0, 7, 7, 7, 7, cf_inst, 0) - >; - - def : Pat<(int_R600_store_dummy 1), - (ExportInst - (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) - >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), (ExportInst R600_Reg128:$src, imm:$type, imm:$base, @@ -507,22 +511,22 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { multiclass SteamOutputExportPattern<Instruction ExportInst, bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { // Stream0 - def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf0inst, 0)>; // Stream1 - def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf1inst, 0)>; // Stream2 - def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf2inst, 0)>; // Stream3 - def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src), + def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf3inst, 0)>; @@ -678,7 +682,7 @@ let Predicates = [isR600toCayman] in { def ADD : R600_2OP_Helper <0x0, "ADD", fadd>; // Non-IEEE MUL: 0 * anything = 0 -def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>; +def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE">; def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>; // TODO: Do these actually match the regular fmin/fmax behavior? def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>; @@ -733,6 +737,7 @@ def SETNE_DX10 : R600_2OP < [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE_NE))] >; +// FIXME: Need combine for AMDGPUfract def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; @@ -758,6 +763,13 @@ def : Pat < (MOV_IMM_I32 imm:$val) >; +def MOV_IMM_GLOBAL_ADDR : MOV_IMM<iPTR, i32imm>; +def : Pat < + (AMDGPUconstdata_ptr tglobaladdr:$addr), + (MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr) +>; + + def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; def : Pat < (fpimm:$val), @@ -851,7 +863,7 @@ class R600_TEX <bits<11> inst, string opName> : i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z, CT:$COORD_TYPE_W), - !strconcat(opName, + !strconcat(" ", opName, " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, " "$SRC_GPR.$srcx$srcy$srcz$srcw " "RID:$RESOURCE_ID SID:$SAMPLER_ID " @@ -1099,14 +1111,13 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < // Clamped to maximum. class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < - inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped + inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamp > { let Itinerary = TransALU; } class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP_Helper < - inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy -> { + inst, "RECIPSQRT_IEEE", AMDGPUrsq> { let Itinerary = TransALU; } @@ -1135,11 +1146,6 @@ def FNEG_R600 : FNEG<R600_Reg32>; // FIXME: Should be predicated on unsafe fp math. multiclass DIV_Common <InstR600 recip_ieee> { def : Pat< - (int_AMDGPU_div f32:$src0, f32:$src1), - (MUL_IEEE $src0, (recip_ieee $src1)) ->; - -def : Pat< (fdiv f32:$src0, f32:$src1), (MUL_IEEE $src0, (recip_ieee $src1)) >; @@ -1147,12 +1153,6 @@ def : Pat< def : RcpPat<recip_ieee, f32>; } -class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> - : Pat < - (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w), - (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x)) ->; - //===----------------------------------------------------------------------===// // R600 / R700 Instructions //===----------------------------------------------------------------------===// @@ -1191,7 +1191,6 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>; - def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; def : RsqPat<RECIPSQRT_IEEE_r600, f32>; @@ -1332,9 +1331,7 @@ def TXD: InstR600 < (outs R600_Reg128:$dst), (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), - "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", - [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, - imm:$resourceId, imm:$samplerId, imm:$textureTarget))], + "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", [], NullALU > { let TEXInst = 1; } @@ -1344,10 +1341,7 @@ def TXD_SHADOW: InstR600 < (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", - [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2, - imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))], - NullALU -> { + [], NullALU> { let TEXInst = 1; } } // End isPseudo = 1 @@ -1426,8 +1420,7 @@ def TEX_VTX_CONSTBUF : } def TEX_VTX_TEXBUF: - InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", - [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr">, VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; @@ -1542,8 +1535,9 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { //===---------------------------------------------------------------------===// let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, usesCustomInserter = 1 in { - def RETURN : ILFormat<(outs), (ins variable_ops), - "RETURN", [(IL_retflag)]>; + def RETURN : ILFormat<(outs), (ins variable_ops), + "RETURN", [(AMDGPUendpgm)] + >; } //===----------------------------------------------------------------------===// @@ -1729,12 +1723,6 @@ def : DwordAddrPat <i32, R600_Reg32>; } // End isR600toCayman Predicate -let Predicates = [isR600] in { -// Intrinsic patterns -defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>; -defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>; -} // End isR600 - def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; let RowFields = ["BaseOp"]; |