summaryrefslogtreecommitdiff
path: root/include/llvm/IR/IntrinsicsAMDGPU.td
diff options
context:
space:
mode:
Diffstat (limited to 'include/llvm/IR/IntrinsicsAMDGPU.td')
-rw-r--r--include/llvm/IR/IntrinsicsAMDGPU.td109
1 files changed, 100 insertions, 9 deletions
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index dcec3860f2ca7..5415c6b0d1518 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -64,6 +64,10 @@ def int_r600_recipsqrt_clamped : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
>;
+def int_r600_cube : Intrinsic<
+ [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]
+>;
+
} // End TargetPrefix = "r600"
let TargetPrefix = "amdgcn" in {
@@ -121,7 +125,8 @@ def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
-def int_amdgcn_s_waitcnt : Intrinsic<[], [llvm_i32_ty], []>;
+def int_amdgcn_s_waitcnt : GCCBuiltin<"__builtin_amdgcn_s_waitcnt">,
+ Intrinsic<[], [llvm_i32_ty], []>;
def int_amdgcn_div_scale : Intrinsic<
// 1st parameter: Numerator
@@ -202,10 +207,19 @@ def int_amdgcn_fract : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
>;
+def int_amdgcn_cvt_pkrtz : Intrinsic<
+ [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+>;
+
def int_amdgcn_class : Intrinsic<
[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]
>;
+def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">,
+ Intrinsic<[llvm_anyfloat_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]
+>;
+
def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">,
Intrinsic<[llvm_float_ty],
[llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
@@ -231,17 +245,20 @@ def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
def int_amdgcn_sffbh :
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-// TODO: Do we want an ordering for these?
-def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty],
- [llvm_anyptr_ty, LLVMMatchType<0>],
- [IntrArgMemOnly, NoCapture<0>]
->;
-def int_amdgcn_atomic_dec : Intrinsic<[llvm_anyint_ty],
- [llvm_anyptr_ty, LLVMMatchType<0>],
+// Fields should mirror atomicrmw
+class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyptr_ty,
+ LLVMMatchType<0>,
+ llvm_i32_ty, // ordering
+ llvm_i32_ty, // scope
+ llvm_i1_ty], // isVolatile
[IntrArgMemOnly, NoCapture<0>]
>;
+def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
+def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
+
class AMDGPUImageLoad : Intrinsic <
[llvm_anyfloat_ty], // vdata(VGPR)
[llvm_anyint_ty, // vaddr(VGPR)
@@ -451,6 +468,32 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
llvm_i1_ty], // slc(imm)
[]>;
+// Uses that do not set the done bit should set IntrWriteMem on the
+// call site.
+def int_amdgcn_exp : Intrinsic <[], [
+ llvm_i32_ty, // tgt,
+ llvm_i32_ty, // en
+ llvm_any_ty, // src0 (f32 or i32)
+ LLVMMatchType<0>, // src1
+ LLVMMatchType<0>, // src2
+ LLVMMatchType<0>, // src3
+ llvm_i1_ty, // done
+ llvm_i1_ty // vm
+ ],
+ []
+>;
+
+// exp with compr bit set.
+def int_amdgcn_exp_compr : Intrinsic <[], [
+ llvm_i32_ty, // tgt,
+ llvm_i32_ty, // en
+ llvm_anyvector_ty, // src0 (v2f16 or v2i16)
+ LLVMMatchType<0>, // src1
+ llvm_i1_ty, // done
+ llvm_i1_ty], // vm
+ []
+>;
+
def int_amdgcn_buffer_wbinvl1_sc :
GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
Intrinsic<[], [], []>;
@@ -530,7 +573,14 @@ def int_amdgcn_ds_swizzle :
GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
-// llvm.amdgcn.lerp
+def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
+>;
+
+def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]
+>;
+
def int_amdgcn_lerp :
GCCBuiltin<"__builtin_amdgcn_lerp">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -619,10 +669,51 @@ def int_amdgcn_s_memrealtime :
// llvm.amdgcn.ds.permute <index> <src>
def int_amdgcn_ds_permute :
+ GCCBuiltin<"__builtin_amdgcn_ds_permute">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
// llvm.amdgcn.ds.bpermute <index> <src>
def int_amdgcn_ds_bpermute :
+ GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+
+//===----------------------------------------------------------------------===//
+// Special Intrinsics for backend internal use only. No frontend
+// should emit calls to these.
+// ===----------------------------------------------------------------------===//
+def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
+ [llvm_i1_ty], [IntrConvergent]
+>;
+
+def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
+ [llvm_i64_ty], [IntrConvergent]
+>;
+
+def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty], [IntrNoMem, IntrConvergent]
+>;
+
+def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
+ [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
+>;
+
+def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
+>;
+
+def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
+ [llvm_i64_ty], [IntrConvergent]
+>;
+
+def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
+
+// Represent unreachable in a divergent region.
+def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
+
+// Emit 2.5 ulp, no denormal division. Should only be inserted by
+// pass based on !fpmath metadata.
+def int_amdgcn_fdiv_fast : Intrinsic<
+ [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+>;
}