diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 6101 |
1 files changed, 2326 insertions, 3775 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index de4bf2ef3055..511cd875ac55 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1232,7 +1232,7 @@ multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, // has 2 operands, neg the second one multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, - Operand IMMType, list<Predicate> Pred> { + list<Predicate> Pred> { def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), !strconcat( "{{ \n\t", @@ -1244,12 +1244,11 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, Requires<Pred>; } multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, - string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, - list<Predicate> Pred = []> { + string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, - IntOp, IMMType, Pred> ; + IntOp, Pred> ; defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, - IntOp, IMMType, Pred> ; + IntOp, Pred> ; } // has 3 operands @@ -1357,21 +1356,21 @@ def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_sub_64 node:$a, node:$b)>; defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", - atomic_load_sub_32_g, i32imm>; + atomic_load_sub_32_g>; defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", - atomic_load_sub_64_g, i64imm>; + atomic_load_sub_64_g>; defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", - atomic_load_sub_32_gen, i32imm>; + atomic_load_sub_32_gen>; defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", - ".add", atomic_load_sub_32_gen, i32imm>; + ".add", atomic_load_sub_32_gen>; defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", - atomic_load_sub_32_s, i32imm>; + atomic_load_sub_32_s>; defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", - atomic_load_sub_64_s, i64imm>; + atomic_load_sub_64_s>; defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", - atomic_load_sub_64_gen, i64imm>; + atomic_load_sub_64_gen>; defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", - ".add", atomic_load_sub_64_gen, i64imm>; + ".add", atomic_load_sub_64_gen>; // atom_swap @@ -2465,2303 +2464,1563 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), // texmode_independent let IsTex = true, IsTexModeUnified = false in { // Texture fetch instructions using handles -def TEX_1D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), - "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), - "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), - "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], $lod;", - []>; -def TEX_1D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), - "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), - "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], $lod;", - []>; -def TEX_1D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), - "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), - "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", - []>; -def TEX_1D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], $lod;", - []>; -def TEX_1D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], $lod;", - []>; -def TEX_1D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], $lod;", - []>; -def TEX_1D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_1D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}];", - []>; -def TEX_1D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], $lod;", - []>; -def TEX_1D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; - -def TEX_2D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], $lod;", - []>; -def TEX_2D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], $lod;", - []>; -def TEX_2D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TEX_2D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], $lod;", - []>; -def TEX_2D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +class TEX_1D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; + +multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { + def _RR : TEX_1D_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_1D_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_1D_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_1D_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_2D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_2D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_2D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_2D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_2D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_2D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", + []>; + +multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_1D_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_3D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_3D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_3D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_3D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_3D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_3D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_3D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; +defm TEX_1D_F32_F32_LEVEL : + TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_S32_F32_LEVEL : + TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_U32_F32_LEVEL : + TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," + " \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_1D_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_1D_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_1D_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_1D_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_CUBE_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_CUBE_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_CUBE_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_CUBE_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_CUBE_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_CUBE_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", - []>; +defm TEX_1D_F32_F32_GRAD + : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_S32_F32_GRAD + : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_U32_F32_GRAD + : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", + []>; + +multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_1D_ARRAY_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_CUBE_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_CUBE_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_CUBE_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_CUBE_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_CUBE_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_CUBE_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", - []>; +defm TEX_1D_ARRAY_F32_F32 + : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_ARRAY_F32_S32 + : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_1D_ARRAY_S32_S32 + : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_ARRAY_S32_F32 + : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_ARRAY_U32_S32 + : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_1D_ARRAY_U32_F32 + : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x\\}], $lod;", + []>; + +multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TLD4_R_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_G_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_B_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_A_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_R_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_G_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_B_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_A_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_R_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_G_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_B_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; -def TLD4_A_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, $s, \\{$x, $y\\}];", - []>; +defm TEX_1D_ARRAY_F32_F32_LEVEL + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_ARRAY_S32_F32_LEVEL + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_ARRAY_U32_F32_LEVEL + : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, + intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," + " \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; } +defm TEX_1D_ARRAY_F32_F32_GRAD + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_1D_ARRAY_S32_F32_GRAD + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_1D_ARRAY_U32_F32_GRAD + : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", + []>; + +multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { + def _RR : TEX_2D_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; +} -// texmode_unified -let IsTex = true, IsTexModeUnified = true in { -// Texture fetch instructions using handles -def TEX_UNIFIED_1D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x), - "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x), - "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), - "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x), - "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x), - "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x), - "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x), - "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", - []>; -def TEX_UNIFIED_1D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; +defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y\\}], $lod;", + []>; + +multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_2D_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_UNIFIED_1D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), - "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), - "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}];", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$lod), - "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], $lod;", - []>; -def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$gradx, Float32Regs:$grady), - "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", - []>; +defm TEX_2D_F32_F32_LEVEL : + TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_S32_F32_LEVEL : + TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_U32_F32_LEVEL : + TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; + +multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_2D_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_2D_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_2D_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_2D_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_UNIFIED_2D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), - "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TEX_UNIFIED_2D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$lod), - "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +defm TEX_2D_F32_F32_GRAD : + TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_S32_F32_GRAD : + TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_U32_F32_GRAD : + TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $y\\}];", + []>; + +multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_2D_ARRAY_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_UNIFIED_2D_ARRAY_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, - Int32Regs:$y), - "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y), - "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}];", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, Float32Regs:$lod), - "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], $lod;", - []>; -def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, - Float32Regs:$y, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$grady0, Float32Regs:$grady1), - "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " - "\\{$grady0, $grady1\\};", - []>; +defm TEX_2D_ARRAY_F32_F32 + : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_ARRAY_F32_S32 + : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_2D_ARRAY_S32_S32 + : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_ARRAY_S32_F32 + : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_ARRAY_U32_S32 + : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_2D_ARRAY_U32_F32 + : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; + +multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_UNIFIED_3D_F32_S32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_3D_F32_F32_GRAD - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_UNIFIED_3D_S32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_3D_S32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; -def TEX_UNIFIED_3D_U32_S32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$z), - "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z), - "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_3D_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, Float32Regs:$lod), - "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_3D_U32_F32_GRAD - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, - Float32Regs:$z, - Float32Regs:$gradx0, Float32Regs:$gradx1, - Float32Regs:$gradx2, Float32Regs:$grady0, - Float32Regs:$grady1, Float32Regs:$grady2), - "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], " - "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " - "\\{$grady0, $grady1, $grady2, $grady2\\};", - []>; +defm TEX_2D_ARRAY_F32_F32_LEVEL + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_ARRAY_S32_F32_LEVEL + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_ARRAY_U32_F32_LEVEL + : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; + +multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_UNIFIED_CUBE_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$x, $y, $z, $z\\}], $lod;", - []>; +defm TEX_2D_ARRAY_F32_F32_GRAD + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_2D_ARRAY_S32_F32_GRAD + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_2D_ARRAY_U32_F32_GRAD + : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_3D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}];", + []>; + +multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { + def _RR : TEX_3D_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_3D_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_3D_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_3D_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TEX_UNIFIED_CUBE_ARRAY_F32_F32 - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL - : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, - Float32Regs:$b, Float32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_ARRAY_S32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}], $lod;", - []>; -def TEX_UNIFIED_CUBE_ARRAY_U32_F32 - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), - "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}];", - []>; -def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$t, Int32Regs:$l, - Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, - Float32Regs:$lod), - "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " - "[$t, \\{$l, $x, $y, $z\\}], $lod;", - []>; +defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; + +multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_3D_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def TLD4_UNIFIED_R_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_G_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_B_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_A_2D_F32_F32 - : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, - Float32Regs:$v2, Float32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_R_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_G_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_B_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_A_2D_S32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_R_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_G_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_B_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; -def TLD4_UNIFIED_A_2D_U32_F32 - : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, - Int32Regs:$v2, Int32Regs:$v3), - (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), - "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " - "[$t, \\{$x, $y\\}];", - []>; +defm TEX_3D_F32_F32_LEVEL + : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_3D_S32_F32_LEVEL + : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_3D_U32_F32_LEVEL + : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z, + intype :$gradx0, intype:$gradx1, + intype:$gradx2, intype:$grady0, + intype:$grady1, intype:$grady2)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}]," + " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," + " \\{$grady0, $grady1, $grady2, $grady2\\};", + []>; + +multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_3D_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_3D_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_3D_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_3D_GRAD_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; } +defm TEX_3D_F32_F32_GRAD + : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_3D_S32_F32_GRAD + : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_3D_U32_F32_GRAD + : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}];", + []>; + +multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { + def _RR : TEX_CUBE_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_CUBE_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_CUBE_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_CUBE_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} +defm TEX_CUBE_F32_F32 + : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_CUBE_S32_F32 + : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_CUBE_U32_F32 + : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins intype:$x, intype:$y, intype:$z, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; + +multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -//=== Surface load instructions -// .clamp variant -let IsSuld = true in { -def SULD_1D_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", - []>; +defm TEX_CUBE_F32_F32_LEVEL + : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_CUBE_S32_F32_LEVEL + : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_CUBE_U32_F32_LEVEL + : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $z\\}];", + []>; + +multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def SULD_1D_ARRAY_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; +defm TEX_CUBE_ARRAY_F32_F32 + : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_S32_F32 + : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_U32_F32 + : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$z, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", + []>; + +multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def SULD_2D_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_CUBE_ARRAY_F32_F32_LEVEL + : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_S32_F32_LEVEL + : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_CUBE_ARRAY_U32_F32_LEVEL + : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TLD4_2D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag texsamp> + : NVPTXInst<(outs outtype:$v0, outtype:$v1, + outtype:$v2, outtype:$v3), + !con(texsamp, (ins intype:$x, intype:$y)), + inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", + []>; + +multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { + def _RR : TLD4_2D_base<inst, outtype, intype, + (ins Int64Regs:$t, Int64Regs:$s)>; + def _RI : TLD4_2D_base<inst, outtype, intype, + (ins Int64Regs:$t, i64imm:$s)>; + def _IR : TLD4_2D_base<inst, outtype, intype, + (ins i64imm:$t, Int64Regs:$s)>; + def _II : TLD4_2D_base<inst, outtype, intype, + (ins i64imm:$t, i64imm:$s)>; +} -def SULD_2D_ARRAY_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TLD4_R_2D_F32_F32 + : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_G_2D_F32_F32 + : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_B_2D_F32_F32 + : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_A_2D_F32_F32 + : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; + +defm TLD4_R_2D_S32_F32 + : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_G_2D_S32_F32 + : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_B_2D_S32_F32 + : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_A_2D_S32_F32 + : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; + +defm TLD4_R_2D_U32_F32 + : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_G_2D_U32_F32 + : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_B_2D_U32_F32 + : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_A_2D_U32_F32 + : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; -def SULD_3D_I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; } -let IsSuld = 2 in { -def SULD_1D_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_ARRAY_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; +// texmode_unified +let IsTex = true, IsTexModeUnified = true in { +// Texture fetch instructions using handles -def SULD_2D_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; +class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", + []>; + +multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; +} -def SULD_2D_ARRAY_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_1D_F32_S32 + : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_F32_F32 + : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_S32_S32 + : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_S32_F32 + : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_U32_S32 + : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_U32_F32 + : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", + []>; + +multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; +} -def SULD_3D_V2I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I64_CLAMP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_1D_F32_F32_LEVEL + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_S32_F32_LEVEL + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_U32_F32_LEVEL + : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } -let IsSuld = 3 in { -def SULD_1D_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; +defm TEX_UNIFIED_1D_F32_F32_GRAD + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_S32_F32_GRAD + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_U32_F32_GRAD + : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", + []>; + +multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; +} -def SULD_1D_ARRAY_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; +defm TEX_UNIFIED_1D_ARRAY_F32_S32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_ARRAY_F32_F32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_S32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_F32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_S32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_F32 + : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", + []>; + +multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t)>; +} -def SULD_2D_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL + : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL + : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL + : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, + intype:$gradx, intype:$grady)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, + (ins i64imm:$t)>; +} -def SULD_2D_ARRAY_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD + : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD + : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD + : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", + []>; + +multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; +} +defm TEX_UNIFIED_2D_F32_S32 + : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_F32_F32 + : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_S32_S32 + : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_S32_F32 + : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_U32_S32 + : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_U32_F32 + : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", + []>; + +multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; +} -def SULD_3D_V4I8_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I16_CLAMP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I32_CLAMP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_2D_F32_F32_LEVEL + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_S32_F32_LEVEL + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_U32_F32_LEVEL + : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; +multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } +defm TEX_UNIFIED_2D_F32_F32_GRAD + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_S32_F32_GRAD + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_U32_F32_GRAD + : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", + []>; +multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; +} -// .trap variant -let IsSuld = true in { -def SULD_1D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", - []>; +defm TEX_UNIFIED_2D_ARRAY_F32_S32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_ARRAY_F32_F32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_S32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_F32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_S32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_F32 + : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$l, $x, $y, $y\\}], $lod;", + []>; +multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t)>; +} -def SULD_1D_ARRAY_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; +defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL + : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL + : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL + : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, + intype:$gradx0, intype:$gradx1, + intype:$grady0, intype:$grady1)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," + " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", + []>; +multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, + (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, + (ins i64imm:$t)>; +} -def SULD_2D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD + : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD + : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD + : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", + []>; +multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; +} -def SULD_2D_ARRAY_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_3D_F32_S32 + : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; +defm TEX_UNIFIED_3D_F32_F32 + : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_S32_S32 + : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_3D_S32_F32 + : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_U32_S32 + : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; +defm TEX_UNIFIED_3D_U32_F32 + : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; +} -def SULD_3D_I8_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I16_TRAP - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I32_TRAP - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I64_TRAP - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_3D_F32_F32_LEVEL + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_S32_F32_LEVEL + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_U32_F32_LEVEL + : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z, + intype:$gradx0, intype:$gradx1, + intype:$gradx2, intype:$grady0, + intype:$grady1, intype:$grady2)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," + " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," + " \\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; } -let IsSuld = 2 in { -def SULD_1D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; +defm TEX_UNIFIED_3D_F32_F32_GRAD + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_S32_F32_GRAD + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_3D_U32_F32_GRAD + : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", + []>; +multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; +} -def SULD_1D_ARRAY_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; +defm TEX_UNIFIED_CUBE_F32_F32 + : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_S32_F32 + : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_U32_F32 + : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$x, $y, $z, $z\\}], $lod;", + []>; +multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t)>; +} -def SULD_2D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; +defm TEX_UNIFIED_CUBE_F32_F32_LEVEL + : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_S32_F32_LEVEL + : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_U32_F32_LEVEL + : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), + inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", + []>; +multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, + (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, + (ins i64imm:$t)>; +} -def SULD_2D_ARRAY_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 + : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; + +class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$r, outtype:$g, + outtype:$b, outtype:$a), + !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, + intype:$lod)), + inst # " \t\\{$r, $g, $b, $a\\}," + " [$t, \\{$l, $x, $y, $z\\}], $lod;", + []>; +multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, + (ins Int64Regs:$t)>; + def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, + (ins i64imm:$t)>; +} -def SULD_3D_V2I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I64_TRAP - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL + : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", + Float32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL + : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", + Int32Regs, Float32Regs>; +defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL + : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", + Int32Regs, Float32Regs>; + +class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype, dag tex> + : NVPTXInst<(outs outtype:$v0, outtype:$v1, + outtype:$v2, outtype:$v3), + !con(tex, (ins intype:$x, intype:$y)), + inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", + []>; +multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, + NVPTXRegClass intype> { + def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; + def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; } -let IsSuld = 3 in { -def SULD_1D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; +defm TLD4_UNIFIED_R_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_UNIFIED_G_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_UNIFIED_B_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; +defm TLD4_UNIFIED_A_2D_F32_F32 + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; + +defm TLD4_UNIFIED_R_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_G_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_B_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_A_2D_S32_F32 + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; + +defm TLD4_UNIFIED_R_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_G_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_B_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; +defm TLD4_UNIFIED_A_2D_U32_F32 + : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; -def SULD_1D_ARRAY_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; +} -def SULD_2D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +//=== Surface load instructions -def SULD_3D_V4I8_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I16_TRAP - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I32_TRAP - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; +let IsSuld = true in { + +class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$x)), + inst # " \\{$r\\}, [$s, \\{$x\\}];", + []>; +multiclass SULD_1D<string inst, NVPTXRegClass outtype> { + def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; } -// .zero variant -let IsSuld = true in { -def SULD_1D_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", - []>; +defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; +defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; +defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; +defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; + +defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; +defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; +defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; +defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; + +defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; +defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; +defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; +defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; + +class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { + def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_1D_ARRAY_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", - []>; +defm SULD_1D_ARRAY_I8_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; +defm SULD_1D_ARRAY_I16_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; +defm SULD_1D_ARRAY_I32_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; +defm SULD_1D_ARRAY_I64_CLAMP + : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; + +defm SULD_1D_ARRAY_I8_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; +defm SULD_1D_ARRAY_I16_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; +defm SULD_1D_ARRAY_I32_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; +defm SULD_1D_ARRAY_I64_TRAP + : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; + +defm SULD_1D_ARRAY_I8_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; +defm SULD_1D_ARRAY_I16_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; +defm SULD_1D_ARRAY_I32_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; +defm SULD_1D_ARRAY_I64_ZERO + : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; + +class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +multiclass SULD_2D<string inst, NVPTXRegClass outtype> { + def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_2D_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", - []>; +defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; +defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; +defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; +defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; + +defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; +defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; +defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; +defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; + +defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; +defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; +defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; +defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; + +class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { + def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_2D_ARRAY_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", - []>; +defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; +defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; +defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; +defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; + +defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; +defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; +defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; +defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; + +defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; +defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; +defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; +defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; + +class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +multiclass SULD_3D<string inst, NVPTXRegClass outtype> { + def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_3D_I8_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I16_ZERO - : NVPTXInst<(outs Int16Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I32_ZERO - : NVPTXInst<(outs Int32Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_I64_ZERO - : NVPTXInst<(outs Int64Regs:$r), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; +defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; +defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; +defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; +defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; + +defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; +defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; +defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; +defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; + +defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; +defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; +defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; +defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; } let IsSuld = 2 in { -def SULD_1D_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_ARRAY_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", - []>; +class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$x)), + inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { + def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_2D_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", - []>; +defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; +defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; +defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; +defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; + +defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; +defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; +defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; +defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; + +defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; +defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; +defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; +defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; + +class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { + def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_2D_ARRAY_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm SULD_1D_ARRAY_V2I8_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V2I16_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V2I32_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; +defm SULD_1D_ARRAY_V2I64_CLAMP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; + +defm SULD_1D_ARRAY_V2I8_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; +defm SULD_1D_ARRAY_V2I16_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; +defm SULD_1D_ARRAY_V2I32_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; +defm SULD_1D_ARRAY_V2I64_TRAP + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; + +defm SULD_1D_ARRAY_V2I8_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; +defm SULD_1D_ARRAY_V2I16_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; +defm SULD_1D_ARRAY_V2I32_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; +defm SULD_1D_ARRAY_V2I64_ZERO + : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; + +class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { + def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; +} + +defm SULD_2D_V2I8_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; +defm SULD_2D_V2I16_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; +defm SULD_2D_V2I32_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; +defm SULD_2D_V2I64_CLAMP + : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; + +defm SULD_2D_V2I8_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; +defm SULD_2D_V2I16_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; +defm SULD_2D_V2I32_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; +defm SULD_2D_V2I64_TRAP + : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; + +defm SULD_2D_V2I8_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; +defm SULD_2D_V2I16_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; +defm SULD_2D_V2I32_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; +defm SULD_2D_V2I64_ZERO + : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; + +class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { + def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; +} + +defm SULD_2D_ARRAY_V2I8_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V2I16_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V2I32_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; +defm SULD_2D_ARRAY_V2I64_CLAMP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; + +defm SULD_2D_ARRAY_V2I8_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; +defm SULD_2D_ARRAY_V2I16_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; +defm SULD_2D_ARRAY_V2I32_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; +defm SULD_2D_ARRAY_V2I64_TRAP + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; + +defm SULD_2D_ARRAY_V2I8_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; +defm SULD_2D_ARRAY_V2I16_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; +defm SULD_2D_ARRAY_V2I32_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; +defm SULD_2D_ARRAY_V2I64_ZERO + : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; + +class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { + def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; +} + +defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; +defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; +defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; +defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; + +defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; +defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; +defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; +defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; + +defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; +defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; +defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; +defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; -def SULD_3D_V2I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V2I64_ZERO - : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", - []>; } let IsSuld = 3 in { -def SULD_1D_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x), - "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", - []>; -def SULD_1D_ARRAY_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; -def SULD_1D_ARRAY_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), - "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x\\}];", - []>; +class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$x)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { + def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_2D_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; -def SULD_2D_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), - "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", - []>; +defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; +defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; +defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; + +defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; +defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; +defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; + +defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; +defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; +defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; + +class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", + []>; +multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { + def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; +} -def SULD_2D_ARRAY_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; -def SULD_2D_ARRAY_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), - "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$l, $x, $y, $y\\}];", - []>; +defm SULD_1D_ARRAY_V4I8_CLAMP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V4I16_CLAMP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; +defm SULD_1D_ARRAY_V4I32_CLAMP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; + +defm SULD_1D_ARRAY_V4I8_TRAP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; +defm SULD_1D_ARRAY_V4I16_TRAP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; +defm SULD_1D_ARRAY_V4I32_TRAP + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; + +defm SULD_1D_ARRAY_V4I8_ZERO + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; +defm SULD_1D_ARRAY_V4I16_ZERO + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; +defm SULD_1D_ARRAY_V4I32_ZERO + : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; + +class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { + def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; +} +defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; +defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; +defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; + +defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; +defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; +defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; + +defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; +defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; +defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; + +class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { + def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; +} + +defm SULD_2D_ARRAY_V4I8_CLAMP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V4I16_CLAMP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; +defm SULD_2D_ARRAY_V4I32_CLAMP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; + +defm SULD_2D_ARRAY_V4I8_TRAP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; +defm SULD_2D_ARRAY_V4I16_TRAP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; +defm SULD_2D_ARRAY_V4I32_TRAP + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; + +defm SULD_2D_ARRAY_V4I8_ZERO + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; +defm SULD_2D_ARRAY_V4I16_ZERO + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; +defm SULD_2D_ARRAY_V4I32_ZERO + : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; + +class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> + : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), + inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { + def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; + def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; +} + +defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; +defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; +defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; + +defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; +defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; +defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; + +defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; +defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; +defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; -def SULD_3D_V4I8_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I16_ZERO - : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; -def SULD_3D_V4I32_ZERO - : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), - "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " - "[$s, \\{$x, $y, $z, $z\\}];", - []>; } //----------------------------------- @@ -4769,56 +4028,88 @@ def SULD_3D_V4I32_ZERO //----------------------------------- let IsSurfTexQuery = true in { -def TXQ_CHANNEL_ORDER +def TXQ_CHANNEL_ORDER_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.channel_order.b32 \t$d, [$a];", []>; -def TXQ_CHANNEL_DATA_TYPE +def TXQ_CHANNEL_ORDER_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.channel_order.b32 \t$d, [$a];", + []>; +def TXQ_CHANNEL_DATA_TYPE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.channel_data_type.b32 \t$d, [$a];", []>; -def TXQ_WIDTH +def TXQ_CHANNEL_DATA_TYPE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.channel_data_type.b32 \t$d, [$a];", + []>; +def TXQ_WIDTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.width.b32 \t$d, [$a];", []>; -def TXQ_HEIGHT +def TXQ_WIDTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.width.b32 \t$d, [$a];", + []>; +def TXQ_HEIGHT_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.height.b32 \t$d, [$a];", []>; -def TXQ_DEPTH +def TXQ_HEIGHT_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.height.b32 \t$d, [$a];", + []>; +def TXQ_DEPTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.depth.b32 \t$d, [$a];", []>; -def TXQ_ARRAY_SIZE +def TXQ_DEPTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.depth.b32 \t$d, [$a];", + []>; +def TXQ_ARRAY_SIZE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.array_size.b32 \t$d, [$a];", []>; -def TXQ_NUM_SAMPLES +def TXQ_ARRAY_SIZE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.array_size.b32 \t$d, [$a];", + []>; +def TXQ_NUM_SAMPLES_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.num_samples.b32 \t$d, [$a];", []>; -def TXQ_NUM_MIPMAP_LEVELS +def TXQ_NUM_SAMPLES_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.num_samples.b32 \t$d, [$a];", + []>; +def TXQ_NUM_MIPMAP_LEVELS_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "txq.num_mipmap_levels.b32 \t$d, [$a];", []>; +def TXQ_NUM_MIPMAP_LEVELS_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "txq.num_mipmap_levels.b32 \t$d, [$a];", + []>; } def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), - (TXQ_CHANNEL_ORDER Int64Regs:$a)>; + (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), - (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; + (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_width Int64Regs:$a), - (TXQ_WIDTH Int64Regs:$a)>; + (TXQ_WIDTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_height Int64Regs:$a), - (TXQ_HEIGHT Int64Regs:$a)>; + (TXQ_HEIGHT_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_depth Int64Regs:$a), - (TXQ_DEPTH Int64Regs:$a)>; + (TXQ_DEPTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), - (TXQ_ARRAY_SIZE Int64Regs:$a)>; + (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), - (TXQ_NUM_SAMPLES Int64Regs:$a)>; + (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), - (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; + (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; //----------------------------------- @@ -4826,44 +4117,68 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), //----------------------------------- let IsSurfTexQuery = true in { -def SUQ_CHANNEL_ORDER +def SUQ_CHANNEL_ORDER_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.channel_order.b32 \t$d, [$a];", []>; -def SUQ_CHANNEL_DATA_TYPE +def SUQ_CHANNEL_ORDER_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.channel_order.b32 \t$d, [$a];", + []>; +def SUQ_CHANNEL_DATA_TYPE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.channel_data_type.b32 \t$d, [$a];", []>; -def SUQ_WIDTH +def SUQ_CHANNEL_DATA_TYPE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.channel_data_type.b32 \t$d, [$a];", + []>; +def SUQ_WIDTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.width.b32 \t$d, [$a];", []>; -def SUQ_HEIGHT +def SUQ_WIDTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.width.b32 \t$d, [$a];", + []>; +def SUQ_HEIGHT_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.height.b32 \t$d, [$a];", []>; -def SUQ_DEPTH +def SUQ_HEIGHT_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.height.b32 \t$d, [$a];", + []>; +def SUQ_DEPTH_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.depth.b32 \t$d, [$a];", []>; -def SUQ_ARRAY_SIZE +def SUQ_DEPTH_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.depth.b32 \t$d, [$a];", + []>; +def SUQ_ARRAY_SIZE_R : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), "suq.array_size.b32 \t$d, [$a];", []>; +def SUQ_ARRAY_SIZE_I + : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), + "suq.array_size.b32 \t$d, [$a];", + []>; } def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), - (SUQ_CHANNEL_ORDER Int64Regs:$a)>; + (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), - (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; + (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_width Int64Regs:$a), - (SUQ_WIDTH Int64Regs:$a)>; + (SUQ_WIDTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_height Int64Regs:$a), - (SUQ_HEIGHT Int64Regs:$a)>; + (SUQ_HEIGHT_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_depth Int64Regs:$a), - (SUQ_DEPTH Int64Regs:$a)>; + (SUQ_DEPTH_R Int64Regs:$a)>; def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), - (SUQ_ARRAY_SIZE Int64Regs:$a)>; + (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; //===- Handle Query -------------------------------------------------------===// @@ -4885,1329 +4200,522 @@ def ISTYPEP_TEXTURE //===- Surface Stores -----------------------------------------------------===// let IsSust = true in { -// Unformatted -// .clamp variant -def SUST_B_1D_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_1D_ARRAY_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), - "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, - Int64Regs:$g), - "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, - Int64Regs:$g), - "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_ARRAY_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r), - "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r, Int64Regs:$g), - "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_3D_B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r), - "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_V2B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B64_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r, Int64Regs:$g), - "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V4B8_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B16_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B32_CLAMP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -// .trap variant -def SUST_B_1D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_1D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), - "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, - Int64Regs:$g), - "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, - Int64Regs:$g), - "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_2D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r), - "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r, Int64Regs:$g), - "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -def SUST_B_3D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r), - "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B64_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r, Int64Regs:$g), - "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; - - -// .zero variant -def SUST_B_1D_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_B_1D_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; +class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, intype:$r)), + inst # " \t[$s, \\{$x\\}], \\{$r\\};", + []>; +multiclass SUST_1D<string inst, NVPTXRegClass intype> { + def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; +} -def SUST_B_1D_ARRAY_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), - "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_B_1D_ARRAY_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, - Int64Regs:$g), - "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_B_1D_ARRAY_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_1D_ARRAY_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; +defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; +defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; +defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; + +defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; +defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; +defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; +defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; + +defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; +defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; +defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; +defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; + +defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; +defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; +defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; + +class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), + inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { + def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; +} +defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; +defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; +defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; +defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; +defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; +defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; +defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; + +defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; +defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; +defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; +defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; + +defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; +defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; +defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; + +class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, + intype:$b, intype:$a)), + inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { + def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; +} -def SUST_B_2D_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, - Int64Regs:$g), - "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_B_2D_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; +defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; +defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; +defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; +defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; + +defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; +defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; +defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; + +defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; +defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; +defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; + +class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), + inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { + def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; +} +defm SUST_B_1D_ARRAY_B8_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_B16_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_B32_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; +defm SUST_B_1D_ARRAY_B64_CLAMP + : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; + +defm SUST_B_1D_ARRAY_B8_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_B16_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_B32_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; +defm SUST_B_1D_ARRAY_B64_TRAP + : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; + +defm SUST_B_1D_ARRAY_B8_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_B16_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_B32_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; +defm SUST_B_1D_ARRAY_B64_ZERO + : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; + +defm SUST_P_1D_ARRAY_B8_TRAP + : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_B16_TRAP + : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_B32_TRAP + : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; + +class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { + def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; +} -def SUST_B_2D_ARRAY_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r), - "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_B_2D_ARRAY_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int64Regs:$r, Int64Regs:$g), - "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_2D_ARRAY_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_2D_ARRAY_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_1D_ARRAY_V2B8_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B16_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B32_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; +defm SUST_B_1D_ARRAY_V2B64_CLAMP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_1D_ARRAY_V2B8_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B16_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B32_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; +defm SUST_B_1D_ARRAY_V2B64_TRAP + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; + +defm SUST_B_1D_ARRAY_V2B8_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B16_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V2B32_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; +defm SUST_B_1D_ARRAY_V2B64_ZERO + : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; + +defm SUST_P_1D_ARRAY_V2B8_TRAP + : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V2B16_TRAP + : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V2B32_TRAP + : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; + +class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { + def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; +} +defm SUST_B_1D_ARRAY_V4B8_CLAMP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B16_CLAMP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B32_CLAMP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_1D_ARRAY_V4B8_TRAP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B16_TRAP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B32_TRAP + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; + +defm SUST_B_1D_ARRAY_V4B8_ZERO + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B16_ZERO + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; +defm SUST_B_1D_ARRAY_V4B32_ZERO + : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; + +defm SUST_P_1D_ARRAY_V4B8_TRAP + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V4B16_TRAP + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; +defm SUST_P_1D_ARRAY_V4B32_TRAP + : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; + +class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), + inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +multiclass SUST_2D<string inst, NVPTXRegClass intype> { + def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; +} -def SUST_B_3D_B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r), - "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_B_3D_V2B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V2B64_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int64Regs:$r, Int64Regs:$g), - "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_B_3D_V4B8_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B16_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_B_3D_V4B32_ZERO - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; +defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; +defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; +defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; + +defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; +defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; +defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; +defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; + +defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; +defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; +defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; +defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; + +defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; +defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; +defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; + +class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { + def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; +} +defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; +defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; +defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; +defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; +defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; +defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; +defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; + +defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; +defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; +defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; +defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; + +defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; +defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; +defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; + +class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { + def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; +} +defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; +defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; +defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; +defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; +defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; + +defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; +defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; +defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; + +defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; +defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; +defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; + +class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + intype:$r)), + inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { + def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; +} -// Formatted +defm SUST_B_2D_ARRAY_B8_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_B16_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_B32_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; +defm SUST_B_2D_ARRAY_B64_CLAMP + : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; + +defm SUST_B_2D_ARRAY_B8_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_B16_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_B32_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; +defm SUST_B_2D_ARRAY_B64_TRAP + : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; + +defm SUST_B_2D_ARRAY_B8_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_B16_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_B32_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; +defm SUST_B_2D_ARRAY_B64_ZERO + : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; + +defm SUST_P_2D_ARRAY_B8_TRAP + : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_B16_TRAP + : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_B32_TRAP + : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; + +class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", + []>; +multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { + def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; +} -def SUST_P_1D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_P_1D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_P_1D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", - []>; -def SUST_P_1D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, - Int16Regs:$b, Int16Regs:$a), - "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, - Int32Regs:$b, Int32Regs:$a), - "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_2D_ARRAY_V2B8_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B16_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B32_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; +defm SUST_B_2D_ARRAY_V2B64_CLAMP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_2D_ARRAY_V2B8_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B16_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B32_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; +defm SUST_B_2D_ARRAY_V2B64_TRAP + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; + +defm SUST_B_2D_ARRAY_V2B8_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B16_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V2B32_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; +defm SUST_B_2D_ARRAY_V2B64_ZERO + : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; + +defm SUST_P_2D_ARRAY_V2B8_TRAP + : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V2B16_TRAP + : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V2B32_TRAP + : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; + +class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { + def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; +} +defm SUST_B_2D_ARRAY_V4B8_CLAMP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B16_CLAMP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B32_CLAMP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; + +defm SUST_B_2D_ARRAY_V4B8_TRAP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B16_TRAP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B32_TRAP + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; + +defm SUST_B_2D_ARRAY_V4B8_ZERO + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B16_ZERO + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; +defm SUST_B_2D_ARRAY_V4B32_ZERO + : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; + +defm SUST_P_2D_ARRAY_V4B8_TRAP + : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V4B16_TRAP + : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; +defm SUST_P_2D_ARRAY_V4B32_TRAP + : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; + +class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + intype:$r)), + inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +multiclass SUST_3D<string inst, NVPTXRegClass intype> { + def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; +} -def SUST_P_1D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_P_1D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), - "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_P_1D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), - "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", - []>; -def SUST_P_1D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g), - "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g), - "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", - []>; -def SUST_P_1D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_1D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; +defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; +defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; +defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; + +defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; +defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; +defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; +defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; + +defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; +defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; +defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; +defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; + +defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; +defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; +defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; + +class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + intype:$r, intype:$g)), + inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", + []>; +multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { + def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; +} +defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; +defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; +defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; +defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; + +defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; +defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; +defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; +defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; + +defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; +defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; +defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; +defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; + +defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; +defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; +defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; + +class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> + : NVPTXInst<(outs), + !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + intype:$r, intype:$g, intype:$b, intype:$a)), + inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", + []>; +multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { + def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; + def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; +} -def SUST_P_2D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_P_2D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g), - "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_P_2D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g), - "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", - []>; -def SUST_P_2D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, - Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, - Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; +defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; +defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; +defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; +defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; +defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; -def SUST_P_2D_ARRAY_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_ARRAY_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r), - "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_ARRAY_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r), - "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", - []>; -def SUST_P_2D_ARRAY_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_2D_ARRAY_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g), - "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_2D_ARRAY_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g), - "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_2D_ARRAY_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_ARRAY_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_2D_ARRAY_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " - "\\{$r, $g, $b, $a\\};", - []>; +defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; +defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; +defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; +defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; +defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; +defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; -def SUST_P_3D_B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_P_3D_B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r), - "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_P_3D_B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r), - "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", - []>; -def SUST_P_3D_V2B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_3D_V2B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g), - "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_3D_V2B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g), - "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g\\};", - []>; -def SUST_P_3D_V4B8_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_3D_V4B16_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; -def SUST_P_3D_V4B32_TRAP - : NVPTXInst<(outs), - (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, - Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " - "\\{$r, $g, $b, $a\\};", - []>; } // Surface store instruction patterns @@ -6217,248 +4725,248 @@ def SUST_P_3D_V4B32_TRAP // .clamp variant def : Pat<(int_nvvm_sust_b_1d_i8_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i16_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i64_clamp Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6467,77 +4975,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp def : Pat<(int_nvvm_sust_b_3d_i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B8_CLAMP Int64Regs:$s, + (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B16_CLAMP Int64Regs:$s, + (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_B_3D_B32_CLAMP Int64Regs:$s, + (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r), - (SUST_B_3D_B64_CLAMP Int64Regs:$s, + (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g), - (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, + (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, + (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, + (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, + (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6545,248 +5053,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp // .trap variant def : Pat<(int_nvvm_sust_b_1d_i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i64_trap Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i64_trap Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6795,77 +5303,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap def : Pat<(int_nvvm_sust_b_3d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B8_TRAP Int64Regs:$s, + (SUST_B_3D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B16_TRAP Int64Regs:$s, + (SUST_B_3D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_B_3D_B32_TRAP Int64Regs:$s, + (SUST_B_3D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r), - (SUST_B_3D_B64_TRAP Int64Regs:$s, + (SUST_B_3D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B8_TRAP Int64Regs:$s, + (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B16_TRAP Int64Regs:$s, + (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_B_3D_V2B32_TRAP Int64Regs:$s, + (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i64_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g), - (SUST_B_3D_V2B64_TRAP Int64Regs:$s, + (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B8_TRAP Int64Regs:$s, + (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B16_TRAP Int64Regs:$s, + (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_3D_V4B32_TRAP Int64Regs:$s, + (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -6873,248 +5381,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap // .zero variant def : Pat<(int_nvvm_sust_b_1d_i8_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i16_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_i64_zero Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; + (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_v2i8_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i16_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v2i64_zero Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_v4i8_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i16_zero Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_v4i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, + (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), - (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), - (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_v2i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v2i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_v4i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_v4i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), - (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), - (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, + (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, + (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -7123,77 +5631,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero def : Pat<(int_nvvm_sust_b_3d_i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B8_ZERO Int64Regs:$s, + (SUST_B_3D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_B_3D_B16_ZERO Int64Regs:$s, + (SUST_B_3D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_B_3D_B32_ZERO Int64Regs:$s, + (SUST_B_3D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r), - (SUST_B_3D_B64_ZERO Int64Regs:$s, + (SUST_B_3D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r)>; def : Pat<(int_nvvm_sust_b_3d_v2i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B8_ZERO Int64Regs:$s, + (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_B_3D_V2B16_ZERO Int64Regs:$s, + (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_B_3D_V2B32_ZERO Int64Regs:$s, + (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v2i64_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g), - (SUST_B_3D_V2B64_ZERO Int64Regs:$s, + (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int64Regs:$r, Int64Regs:$g)>; def : Pat<(int_nvvm_sust_b_3d_v4i8_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B8_ZERO Int64Regs:$s, + (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i16_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_B_3D_V4B16_ZERO Int64Regs:$s, + (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_b_3d_v4i32_zero Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_B_3D_V4B32_ZERO Int64Regs:$s, + (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -7202,207 +5710,207 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_zero def : Pat<(int_nvvm_sust_p_1d_i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), - (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), - (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), - (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), - (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), - (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_array_i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_array_i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), - (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_array_i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), - (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), - (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), - (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -7411,63 +5919,63 @@ def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap def : Pat<(int_nvvm_sust_p_3d_i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_P_3D_B8_TRAP Int64Regs:$s, + (SUST_P_3D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_3d_i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r), - (SUST_P_3D_B16_TRAP Int64Regs:$s, + (SUST_P_3D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r)>; def : Pat<(int_nvvm_sust_p_3d_i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r), - (SUST_P_3D_B32_TRAP Int64Regs:$s, + (SUST_P_3D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r)>; def : Pat<(int_nvvm_sust_p_3d_v2i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_P_3D_V2B8_TRAP Int64Regs:$s, + (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_3d_v2i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g), - (SUST_P_3D_V2B16_TRAP Int64Regs:$s, + (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g)>; def : Pat<(int_nvvm_sust_p_3d_v2i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g), - (SUST_P_3D_V2B32_TRAP Int64Regs:$s, + (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g)>; def : Pat<(int_nvvm_sust_p_3d_v4i8_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_3D_V4B8_TRAP Int64Regs:$s, + (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_3d_v4i16_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), - (SUST_P_3D_V4B16_TRAP Int64Regs:$s, + (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; def : Pat<(int_nvvm_sust_p_3d_v4i32_trap Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), - (SUST_P_3D_V4B32_TRAP Int64Regs:$s, + (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; @@ -7578,6 +6086,7 @@ class WMMA_REGINFO<WMMA_REGS r, string op> !eq(ptx_elt_type, "bf16") : Int32Regs, !eq(ptx_elt_type, "tf32") : Int32Regs, !eq(ptx_elt_type, "s32") : Int32Regs, + !eq(ptx_elt_type, "b16") : Int32Regs, !eq(ptx_elt_type, "s8") : Int32Regs, !eq(ptx_elt_type, "u8") : Int32Regs, !eq(ptx_elt_type, "s4") : Int32Regs, @@ -7661,7 +6170,11 @@ class WMMA_REGINFO<WMMA_REGS r, string op> !eq(geom, "m16n8k64"), !eq(geom, "m8n8k128"), !eq(geom, "m16n8k128"), - !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70]); + !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70], + + !and(!eq(op,"ldmatrix"), + !eq(ptx_elt_type,"b16"), + !eq(geom, "m8n8")) : [hasSM75, hasPTX65]); // template DAGs for instruction inputs/output. dag Outs = !dag(outs, ptx_regs, reg_names); @@ -7910,6 +6423,44 @@ defset list<WMMA_INSTR> MMAs = { } // layout_a } // defset +// +// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 +// +class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, + DAGOperand SrcOp> + : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, + Requires<Frag.Predicates> { + // Build PatFrag that only matches particular address space. + PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), + !cond(!eq(Space, ".shared"): AS_match.shared, + true: AS_match.generic)>; + // Build AS-constrained pattern. + let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; + + let OutOperandList = Frag.Outs; + let InOperandList = !con(Args, (ins MmaCode:$ptx)); + let AsmString = "ldmatrix.sync.aligned." + # Frag.geom + # "." # Frag.frag + # !if(Transposed, ".trans", "") + # Space + # "." # Frag.ptx_elt_type + # " " # Frag.regstring # ", [$src];"; +} + +// Create all ldmatrix variants +defset list<WMMA_INSTR> LDMATRIXs = { + foreach transposed = [false, true] in { + foreach space = [".shared", ""] in { + foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { + foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in + if NVVM_LDMATRIX_SUPPORTED<frag>.ret then + def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, + addr>; + } // addr + } // space + } // transposed +} // defset // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a // dag, so the ptx.version must be appended *after* foreach replaces 'ins' with @@ -7921,5 +6472,5 @@ class MMA_PAT<WMMA_INSTR wi> Requires<wi.Predicates>; // Build intrinsic->instruction patterns for all MMA instructions. -foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs) in +foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in def : MMA_PAT<mma>; |