aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td6101
1 files changed, 2326 insertions, 3775 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index de4bf2ef3055..511cd875ac55 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1232,7 +1232,7 @@ multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
// has 2 operands, neg the second one
multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
- Operand IMMType, list<Predicate> Pred> {
+ list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
!strconcat(
"{{ \n\t",
@@ -1244,12 +1244,11 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
Requires<Pred>;
}
multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
- string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
- list<Predicate> Pred = []> {
+ string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
- IntOp, IMMType, Pred> ;
+ IntOp, Pred> ;
defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
- IntOp, IMMType, Pred> ;
+ IntOp, Pred> ;
}
// has 3 operands
@@ -1357,21 +1356,21 @@ def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_sub_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
- atomic_load_sub_32_g, i32imm>;
+ atomic_load_sub_32_g>;
defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
- atomic_load_sub_64_g, i64imm>;
+ atomic_load_sub_64_g>;
defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
- atomic_load_sub_32_gen, i32imm>;
+ atomic_load_sub_32_gen>;
defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
- ".add", atomic_load_sub_32_gen, i32imm>;
+ ".add", atomic_load_sub_32_gen>;
defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
- atomic_load_sub_32_s, i32imm>;
+ atomic_load_sub_32_s>;
defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
- atomic_load_sub_64_s, i64imm>;
+ atomic_load_sub_64_s>;
defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
- atomic_load_sub_64_gen, i64imm>;
+ atomic_load_sub_64_gen>;
defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
- ".add", atomic_load_sub_64_gen, i64imm>;
+ ".add", atomic_load_sub_64_gen>;
// atom_swap
@@ -2465,2303 +2464,1563 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// texmode_independent
let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
-def TEX_1D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-
-def TEX_2D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+class TEX_1D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+
+multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_1D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_2D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
+ []>;
+
+multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_3D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_3D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_3D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+defm TEX_1D_F32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
+ " \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_CUBE_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_CUBE_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_CUBE_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+defm TEX_1D_F32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
+ []>;
+
+multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_CUBE_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_CUBE_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_CUBE_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+defm TEX_1D_ARRAY_F32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_F32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_U32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+
+multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TLD4_R_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_R_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_R_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
+defm TEX_1D_ARRAY_F32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x,
+ intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
+ " \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
}
+defm TEX_1D_ARRAY_F32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
+ []>;
+
+multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
+}
-// texmode_unified
-let IsTex = true, IsTexModeUnified = true in {
-// Texture fetch instructions using handles
-def TEX_UNIFIED_1D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_1D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+defm TEX_2D_F32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+
+multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_2D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_2D_F32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_2D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_2D_ARRAY_F32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_F32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_U32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_3D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_UNIFIED_3D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_UNIFIED_3D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+defm TEX_2D_ARRAY_F32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+
+multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_CUBE_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+defm TEX_2D_ARRAY_F32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_3D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TLD4_UNIFIED_R_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_R_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_R_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
+defm TEX_3D_F32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype :$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
}
+defm TEX_3D_F32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
+defm TEX_CUBE_F32_F32
+ : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32
+ : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32
+ : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-//=== Surface load instructions
-// .clamp variant
-let IsSuld = true in {
-def SULD_1D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_CUBE_F32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $z\\}];",
+ []>;
+
+multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_1D_ARRAY_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_CUBE_ARRAY_F32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_2D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_CUBE_ARRAY_F32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TLD4_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+ outtype:$v2, outtype:$v3),
+ !con(texsamp, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
+ []>;
+
+multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TLD4_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TLD4_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TLD4_2D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TLD4_2D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_2D_ARRAY_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TLD4_R_2D_F32_F32
+ : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_G_2D_F32_F32
+ : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_B_2D_F32_F32
+ : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_A_2D_F32_F32
+ : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_R_2D_S32_F32
+ : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_S32_F32
+ : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_S32_F32
+ : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_S32_F32
+ : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_R_2D_U32_F32
+ : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_U32_F32
+ : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_U32_F32
+ : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_U32_F32
+ : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
-def SULD_3D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
-let IsSuld = 2 in {
-def SULD_1D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+// texmode_unified
+let IsTex = true, IsTexModeUnified = true in {
+// Texture fetch instructions using handles
-def SULD_2D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_F32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_S32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_U32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 3 in {
-def SULD_1D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x,
+ intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
+defm TEX_UNIFIED_2D_F32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_F32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_S32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_U32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_2D_F32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
+defm TEX_UNIFIED_2D_F32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-// .trap variant
-let IsSuld = true in {
-def SULD_1D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_F32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_S32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_U32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 2 in {
-def SULD_1D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_F32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_3D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+ outtype:$v2, outtype:$v3),
+ !con(tex, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
+ []>;
+multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 3 in {
-def SULD_1D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
+defm TLD4_UNIFIED_R_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
-def SULD_1D_ARRAY_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+}
-def SULD_2D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+//=== Surface load instructions
-def SULD_3D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
+let IsSuld = true in {
+
+class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
}
-// .zero variant
-let IsSuld = true in {
-def SULD_1D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
+defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
+defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
+defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
+defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
+defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
+defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
+defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
+defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
+defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_1D_ARRAY_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm SULD_1D_ARRAY_I8_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I16_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I32_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_I64_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I16_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I32_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_I64_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I16_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I32_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_I64_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
+
+class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
+defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
+defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
+defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
+defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
+defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
+defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
+defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
+defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
+defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
+
+class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_3D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
+defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
+defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
+defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
+
+defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
+defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
+defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
+defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
+
+defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
+defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
+defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
+defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
}
let IsSuld = 2 in {
-def SULD_1D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_1D_ARRAY_V2I8_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_V2I8_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_V2I16_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_V2I32_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_V2I64_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_V2I8_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_V2I16_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_V2I32_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_V2I64_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_V2I8_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_V2I16_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_V2I32_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_V2I64_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V2I8_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
+
+class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
+defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
+defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
+defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
+defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
+defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
+defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
+defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
+defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
+defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
-def SULD_3D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
let IsSuld = 3 in {
-def SULD_1D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
+
+class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_1D_ARRAY_V4I8_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V4I8_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
+
+class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
+defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
+defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
+defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
+defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
+
+defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
+defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
+defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
-def SULD_3D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
//-----------------------------------
@@ -4769,56 +4028,88 @@ def SULD_3D_V4I32_ZERO
//-----------------------------------
let IsSurfTexQuery = true in {
-def TXQ_CHANNEL_ORDER
+def TXQ_CHANNEL_ORDER_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_order.b32 \t$d, [$a];",
[]>;
-def TXQ_CHANNEL_DATA_TYPE
+def TXQ_CHANNEL_ORDER_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.channel_order.b32 \t$d, [$a];",
+ []>;
+def TXQ_CHANNEL_DATA_TYPE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_data_type.b32 \t$d, [$a];",
[]>;
-def TXQ_WIDTH
+def TXQ_CHANNEL_DATA_TYPE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def TXQ_WIDTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.width.b32 \t$d, [$a];",
[]>;
-def TXQ_HEIGHT
+def TXQ_WIDTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.width.b32 \t$d, [$a];",
+ []>;
+def TXQ_HEIGHT_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.height.b32 \t$d, [$a];",
[]>;
-def TXQ_DEPTH
+def TXQ_HEIGHT_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.height.b32 \t$d, [$a];",
+ []>;
+def TXQ_DEPTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.depth.b32 \t$d, [$a];",
[]>;
-def TXQ_ARRAY_SIZE
+def TXQ_DEPTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.depth.b32 \t$d, [$a];",
+ []>;
+def TXQ_ARRAY_SIZE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.array_size.b32 \t$d, [$a];",
[]>;
-def TXQ_NUM_SAMPLES
+def TXQ_ARRAY_SIZE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.array_size.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_SAMPLES_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.num_samples.b32 \t$d, [$a];",
[]>;
-def TXQ_NUM_MIPMAP_LEVELS
+def TXQ_NUM_SAMPLES_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.num_samples.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_MIPMAP_LEVELS_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.num_mipmap_levels.b32 \t$d, [$a];",
[]>;
+def TXQ_NUM_MIPMAP_LEVELS_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.num_mipmap_levels.b32 \t$d, [$a];",
+ []>;
}
def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
- (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
+ (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
- (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+ (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_width Int64Regs:$a),
- (TXQ_WIDTH Int64Regs:$a)>;
+ (TXQ_WIDTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_height Int64Regs:$a),
- (TXQ_HEIGHT Int64Regs:$a)>;
+ (TXQ_HEIGHT_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
- (TXQ_DEPTH Int64Regs:$a)>;
+ (TXQ_DEPTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
- (TXQ_ARRAY_SIZE Int64Regs:$a)>;
+ (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
- (TXQ_NUM_SAMPLES Int64Regs:$a)>;
+ (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
- (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
+ (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
//-----------------------------------
@@ -4826,44 +4117,68 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
//-----------------------------------
let IsSurfTexQuery = true in {
-def SUQ_CHANNEL_ORDER
+def SUQ_CHANNEL_ORDER_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_order.b32 \t$d, [$a];",
[]>;
-def SUQ_CHANNEL_DATA_TYPE
+def SUQ_CHANNEL_ORDER_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.channel_order.b32 \t$d, [$a];",
+ []>;
+def SUQ_CHANNEL_DATA_TYPE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_data_type.b32 \t$d, [$a];",
[]>;
-def SUQ_WIDTH
+def SUQ_CHANNEL_DATA_TYPE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def SUQ_WIDTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.width.b32 \t$d, [$a];",
[]>;
-def SUQ_HEIGHT
+def SUQ_WIDTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.width.b32 \t$d, [$a];",
+ []>;
+def SUQ_HEIGHT_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.height.b32 \t$d, [$a];",
[]>;
-def SUQ_DEPTH
+def SUQ_HEIGHT_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.height.b32 \t$d, [$a];",
+ []>;
+def SUQ_DEPTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.depth.b32 \t$d, [$a];",
[]>;
-def SUQ_ARRAY_SIZE
+def SUQ_DEPTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.depth.b32 \t$d, [$a];",
+ []>;
+def SUQ_ARRAY_SIZE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.array_size.b32 \t$d, [$a];",
[]>;
+def SUQ_ARRAY_SIZE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.array_size.b32 \t$d, [$a];",
+ []>;
}
def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
- (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
+ (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
- (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+ (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_width Int64Regs:$a),
- (SUQ_WIDTH Int64Regs:$a)>;
+ (SUQ_WIDTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_height Int64Regs:$a),
- (SUQ_HEIGHT Int64Regs:$a)>;
+ (SUQ_HEIGHT_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
- (SUQ_DEPTH Int64Regs:$a)>;
+ (SUQ_DEPTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
- (SUQ_ARRAY_SIZE Int64Regs:$a)>;
+ (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
//===- Handle Query -------------------------------------------------------===//
@@ -4885,1329 +4200,522 @@ def ISTYPEP_TEXTURE
//===- Surface Stores -----------------------------------------------------===//
let IsSust = true in {
-// Unformatted
-// .clamp variant
-def SUST_B_1D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_1D_ARRAY_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_ARRAY_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_3D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-// .trap variant
-def SUST_B_1D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_1D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_3D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-// .zero variant
-def SUST_B_1D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
+class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r)),
+ inst # " \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+multiclass SUST_1D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_1D_ARRAY_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
+
+class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
+ intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_2D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_ARRAY_B8_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_B8_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B16_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B32_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_2D_ARRAY_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_ARRAY_V2B8_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_V2B8_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B16_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B32_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_ARRAY_V4B8_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_ARRAY_V4B8_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B16_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B32_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+multiclass SUST_2D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_3D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
+
+class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
-// Formatted
+defm SUST_B_2D_ARRAY_B8_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_B8_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B16_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B32_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_1D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_2D_ARRAY_V2B8_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_V2B8_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B16_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B32_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_ARRAY_V4B8_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_ARRAY_V4B8_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B16_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B32_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
+
+class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+multiclass SUST_3D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_1D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
+defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
+defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
+defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
+defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
+defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
+defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
+defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
+defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
+defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
+defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
+defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
+
+class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
+defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
+defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
+defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
+defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
+defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
+defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
+defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
+defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
+
+class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_2D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
+defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
+defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
-def SUST_P_2D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
+defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
+defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
+defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
+defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
+defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
-def SUST_P_3D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_3D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_3D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
}
// Surface store instruction patterns
@@ -6217,248 +4725,248 @@ def SUST_P_3D_V4B32_TRAP
// .clamp variant
def : Pat<(int_nvvm_sust_b_1d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6467,77 +4975,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
def : Pat<(int_nvvm_sust_b_3d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6545,248 +5053,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
// .trap variant
def : Pat<(int_nvvm_sust_b_1d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6795,77 +5303,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
def : Pat<(int_nvvm_sust_b_3d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_TRAP Int64Regs:$s,
+ (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6873,248 +5381,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
// .zero variant
def : Pat<(int_nvvm_sust_b_1d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7123,77 +5631,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
def : Pat<(int_nvvm_sust_b_3d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_ZERO Int64Regs:$s,
+ (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7202,207 +5710,207 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
def : Pat<(int_nvvm_sust_p_1d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7411,63 +5919,63 @@ def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
def : Pat<(int_nvvm_sust_p_3d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_P_3D_B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_P_3D_B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_P_3D_B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7578,6 +6086,7 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(ptx_elt_type, "bf16") : Int32Regs,
!eq(ptx_elt_type, "tf32") : Int32Regs,
!eq(ptx_elt_type, "s32") : Int32Regs,
+ !eq(ptx_elt_type, "b16") : Int32Regs,
!eq(ptx_elt_type, "s8") : Int32Regs,
!eq(ptx_elt_type, "u8") : Int32Regs,
!eq(ptx_elt_type, "s4") : Int32Regs,
@@ -7661,7 +6170,11 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(geom, "m16n8k64"),
!eq(geom, "m8n8k128"),
!eq(geom, "m16n8k128"),
- !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70]);
+ !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
+
+ !and(!eq(op,"ldmatrix"),
+ !eq(ptx_elt_type,"b16"),
+ !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
// template DAGs for instruction inputs/output.
dag Outs = !dag(outs, ptx_regs, reg_names);
@@ -7910,6 +6423,44 @@ defset list<WMMA_INSTR> MMAs = {
} // layout_a
} // defset
+//
+// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
+//
+class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
+ DAGOperand SrcOp>
+ : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
+ Requires<Frag.Predicates> {
+ // Build PatFrag that only matches particular address space.
+ PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
+ !cond(!eq(Space, ".shared"): AS_match.shared,
+ true: AS_match.generic)>;
+ // Build AS-constrained pattern.
+ let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
+
+ let OutOperandList = Frag.Outs;
+ let InOperandList = !con(Args, (ins MmaCode:$ptx));
+ let AsmString = "ldmatrix.sync.aligned."
+ # Frag.geom
+ # "." # Frag.frag
+ # !if(Transposed, ".trans", "")
+ # Space
+ # "." # Frag.ptx_elt_type
+ # " " # Frag.regstring # ", [$src];";
+}
+
+// Create all ldmatrix variants
+defset list<WMMA_INSTR> LDMATRIXs = {
+ foreach transposed = [false, true] in {
+ foreach space = [".shared", ""] in {
+ foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
+ foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
+ if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
+ def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
+ addr>;
+ } // addr
+ } // space
+ } // transposed
+} // defset
// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
@@ -7921,5 +6472,5 @@ class MMA_PAT<WMMA_INSTR wi>
Requires<wi.Predicates>;
// Build intrinsic->instruction patterns for all MMA instructions.
-foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs) in
+foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
def : MMA_PAT<mma>;