summaryrefslogtreecommitdiff
path: root/test/CodeGen/builtins-nvptx.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/builtins-nvptx.c')
-rw-r--r--test/CodeGen/builtins-nvptx.c252
1 files changed, 180 insertions, 72 deletions
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c
index 745e74f0ca64a..cd21361140bcd 100644
--- a/test/CodeGen/builtins-nvptx.c
+++ b/test/CodeGen/builtins-nvptx.c
@@ -1,6 +1,8 @@
// REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
-// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | \
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=LP32 %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | \
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=LP64 %s
#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
@@ -9,15 +11,15 @@
__device__ int read_tid() {
-// CHECK: call i32 @llvm.ptx.read.tid.x()
-// CHECK: call i32 @llvm.ptx.read.tid.y()
-// CHECK: call i32 @llvm.ptx.read.tid.z()
-// CHECK: call i32 @llvm.ptx.read.tid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
- int x = __builtin_ptx_read_tid_x();
- int y = __builtin_ptx_read_tid_y();
- int z = __builtin_ptx_read_tid_z();
- int w = __builtin_ptx_read_tid_w();
+ int x = __nvvm_read_ptx_sreg_tid_x();
+ int y = __nvvm_read_ptx_sreg_tid_y();
+ int z = __nvvm_read_ptx_sreg_tid_z();
+ int w = __nvvm_read_ptx_sreg_tid_w();
return x + y + z + w;
@@ -25,15 +27,15 @@ __device__ int read_tid() {
__device__ int read_ntid() {
-// CHECK: call i32 @llvm.ptx.read.ntid.x()
-// CHECK: call i32 @llvm.ptx.read.ntid.y()
-// CHECK: call i32 @llvm.ptx.read.ntid.z()
-// CHECK: call i32 @llvm.ptx.read.ntid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
- int x = __builtin_ptx_read_ntid_x();
- int y = __builtin_ptx_read_ntid_y();
- int z = __builtin_ptx_read_ntid_z();
- int w = __builtin_ptx_read_ntid_w();
+ int x = __nvvm_read_ptx_sreg_ntid_x();
+ int y = __nvvm_read_ptx_sreg_ntid_y();
+ int z = __nvvm_read_ptx_sreg_ntid_z();
+ int w = __nvvm_read_ptx_sreg_ntid_w();
return x + y + z + w;
@@ -41,15 +43,15 @@ __device__ int read_ntid() {
__device__ int read_ctaid() {
-// CHECK: call i32 @llvm.ptx.read.ctaid.x()
-// CHECK: call i32 @llvm.ptx.read.ctaid.y()
-// CHECK: call i32 @llvm.ptx.read.ctaid.z()
-// CHECK: call i32 @llvm.ptx.read.ctaid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
- int x = __builtin_ptx_read_ctaid_x();
- int y = __builtin_ptx_read_ctaid_y();
- int z = __builtin_ptx_read_ctaid_z();
- int w = __builtin_ptx_read_ctaid_w();
+ int x = __nvvm_read_ptx_sreg_ctaid_x();
+ int y = __nvvm_read_ptx_sreg_ctaid_y();
+ int z = __nvvm_read_ptx_sreg_ctaid_z();
+ int w = __nvvm_read_ptx_sreg_ctaid_w();
return x + y + z + w;
@@ -57,15 +59,15 @@ __device__ int read_ctaid() {
__device__ int read_nctaid() {
-// CHECK: call i32 @llvm.ptx.read.nctaid.x()
-// CHECK: call i32 @llvm.ptx.read.nctaid.y()
-// CHECK: call i32 @llvm.ptx.read.nctaid.z()
-// CHECK: call i32 @llvm.ptx.read.nctaid.w()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
- int x = __builtin_ptx_read_nctaid_x();
- int y = __builtin_ptx_read_nctaid_y();
- int z = __builtin_ptx_read_nctaid_z();
- int w = __builtin_ptx_read_nctaid_w();
+ int x = __nvvm_read_ptx_sreg_nctaid_x();
+ int y = __nvvm_read_ptx_sreg_nctaid_y();
+ int z = __nvvm_read_ptx_sreg_nctaid_z();
+ int w = __nvvm_read_ptx_sreg_nctaid_w();
return x + y + z + w;
@@ -73,19 +75,19 @@ __device__ int read_nctaid() {
__device__ int read_ids() {
-// CHECK: call i32 @llvm.ptx.read.laneid()
-// CHECK: call i32 @llvm.ptx.read.warpid()
-// CHECK: call i32 @llvm.ptx.read.nwarpid()
-// CHECK: call i32 @llvm.ptx.read.smid()
-// CHECK: call i32 @llvm.ptx.read.nsmid()
-// CHECK: call i32 @llvm.ptx.read.gridid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid()
- int a = __builtin_ptx_read_laneid();
- int b = __builtin_ptx_read_warpid();
- int c = __builtin_ptx_read_nwarpid();
- int d = __builtin_ptx_read_smid();
- int e = __builtin_ptx_read_nsmid();
- int f = __builtin_ptx_read_gridid();
+ int a = __nvvm_read_ptx_sreg_laneid();
+ int b = __nvvm_read_ptx_sreg_warpid();
+ int c = __nvvm_read_ptx_sreg_nwarpid();
+ int d = __nvvm_read_ptx_sreg_smid();
+ int e = __nvvm_read_ptx_sreg_nsmid();
+ int f = __nvvm_read_ptx_sreg_gridid();
return a + b + c + d + e + f;
@@ -93,17 +95,17 @@ __device__ int read_ids() {
__device__ int read_lanemasks() {
-// CHECK: call i32 @llvm.ptx.read.lanemask.eq()
-// CHECK: call i32 @llvm.ptx.read.lanemask.le()
-// CHECK: call i32 @llvm.ptx.read.lanemask.lt()
-// CHECK: call i32 @llvm.ptx.read.lanemask.ge()
-// CHECK: call i32 @llvm.ptx.read.lanemask.gt()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
- int a = __builtin_ptx_read_lanemask_eq();
- int b = __builtin_ptx_read_lanemask_le();
- int c = __builtin_ptx_read_lanemask_lt();
- int d = __builtin_ptx_read_lanemask_ge();
- int e = __builtin_ptx_read_lanemask_gt();
+ int a = __nvvm_read_ptx_sreg_lanemask_eq();
+ int b = __nvvm_read_ptx_sreg_lanemask_le();
+ int c = __nvvm_read_ptx_sreg_lanemask_lt();
+ int d = __nvvm_read_ptx_sreg_lanemask_ge();
+ int e = __nvvm_read_ptx_sreg_lanemask_gt();
return a + b + c + d + e;
@@ -111,26 +113,26 @@ __device__ int read_lanemasks() {
__device__ long long read_clocks() {
-// CHECK: call i32 @llvm.ptx.read.clock()
-// CHECK: call i64 @llvm.ptx.read.clock64()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
+// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
- int a = __builtin_ptx_read_clock();
- long long b = __builtin_ptx_read_clock64();
+ int a = __nvvm_read_ptx_sreg_clock();
+ long long b = __nvvm_read_ptx_sreg_clock64();
return a + b;
}
__device__ int read_pms() {
-// CHECK: call i32 @llvm.ptx.read.pm0()
-// CHECK: call i32 @llvm.ptx.read.pm1()
-// CHECK: call i32 @llvm.ptx.read.pm2()
-// CHECK: call i32 @llvm.ptx.read.pm3()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2()
+// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3()
- int a = __builtin_ptx_read_pm0();
- int b = __builtin_ptx_read_pm1();
- int c = __builtin_ptx_read_pm2();
- int d = __builtin_ptx_read_pm3();
+ int a = __nvvm_read_ptx_sreg_pm0();
+ int b = __nvvm_read_ptx_sreg_pm1();
+ int c = __nvvm_read_ptx_sreg_pm2();
+ int d = __nvvm_read_ptx_sreg_pm3();
return a + b + c + d;
@@ -138,9 +140,9 @@ __device__ int read_pms() {
__device__ void sync() {
-// CHECK: call void @llvm.ptx.bar.sync(i32 0)
+// CHECK: call void @llvm.nvvm.bar.sync(i32 0)
- __builtin_ptx_bar_sync(0);
+ __nvvm_bar_sync(0);
}
@@ -177,7 +179,7 @@ __device__ void nvvm_math(float f1, float f2, double d1, double d2) {
// CHECK: call void @llvm.nvvm.membar.sys()
__nvvm_membar_sys();
// CHECK: call void @llvm.nvvm.barrier0()
- __nvvm_bar0();
+ __syncthreads();
}
__device__ int di;
@@ -189,7 +191,7 @@ __shared__ long long sll;
// Check for atomic intrinsics
// CHECK-LABEL: nvvm_atom
-__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
+__device__ void nvvm_atom(float *fp, float f, int *ip, int i, unsigned int *uip, unsigned ui, long *lp, long l,
long long *llp, long long ll) {
// CHECK: atomicrmw add
__nvvm_atom_add_gen_i(ip, i);
@@ -272,5 +274,111 @@ __device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
// CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
__nvvm_atom_add_gen_f(fp, f);
+ // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32
+ __nvvm_atom_inc_gen_ui(uip, ui);
+
+ // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32
+ __nvvm_atom_dec_gen_ui(uip, ui);
+
// CHECK: ret
}
+
+// CHECK-LABEL: nvvm_ldg
+__device__ void nvvm_ldg(const void *p) {
+ // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1)
+ // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1)
+ __nvvm_ldg_c((const char *)p);
+ __nvvm_ldg_uc((const unsigned char *)p);
+
+ // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2)
+ // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2)
+ __nvvm_ldg_s((const short *)p);
+ __nvvm_ldg_us((const unsigned short *)p);
+
+ // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
+ // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
+ __nvvm_ldg_i((const int *)p);
+ __nvvm_ldg_ui((const unsigned int *)p);
+
+ // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
+ // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
+ // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8)
+ // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8)
+ __nvvm_ldg_l((const long *)p);
+ __nvvm_ldg_ul((const unsigned long *)p);
+
+ // CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0f32(float* {{%[0-9]+}}, i32 4)
+ __nvvm_ldg_f((const float *)p);
+ // CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0f64(double* {{%[0-9]+}}, i32 8)
+ __nvvm_ldg_d((const double *)p);
+
+ // In practice, the pointers we pass to __ldg will be aligned as appropriate
+ // for the CUDA <type>N vector types (e.g. short4), which are not the same as
+ // the LLVM vector types. However, each LLVM vector type has an alignment
+ // less than or equal to its corresponding CUDA type, so we're OK.
+ //
+ // PTX Interoperability section 2.2: "For a vector with an even number of
+ // elements, its alignment is set to number of elements times the alignment of
+ // its member: n*alignof(t)."
+
+ // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2)
+ // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2)
+ typedef char char2 __attribute__((ext_vector_type(2)));
+ typedef unsigned char uchar2 __attribute__((ext_vector_type(2)));
+ __nvvm_ldg_c2((const char2 *)p);
+ __nvvm_ldg_uc2((const uchar2 *)p);
+
+ // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4)
+ // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4)
+ typedef char char4 __attribute__((ext_vector_type(4)));
+ typedef unsigned char uchar4 __attribute__((ext_vector_type(4)));
+ __nvvm_ldg_c4((const char4 *)p);
+ __nvvm_ldg_uc4((const uchar4 *)p);
+
+ // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4)
+ // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4)
+ typedef short short2 __attribute__((ext_vector_type(2)));
+ typedef unsigned short ushort2 __attribute__((ext_vector_type(2)));
+ __nvvm_ldg_s2((const short2 *)p);
+ __nvvm_ldg_us2((const ushort2 *)p);
+
+ // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8)
+ // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8)
+ typedef short short4 __attribute__((ext_vector_type(4)));
+ typedef unsigned short ushort4 __attribute__((ext_vector_type(4)));
+ __nvvm_ldg_s4((const short4 *)p);
+ __nvvm_ldg_us4((const ushort4 *)p);
+
+ // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8)
+ // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8)
+ typedef int int2 __attribute__((ext_vector_type(2)));
+ typedef unsigned int uint2 __attribute__((ext_vector_type(2)));
+ __nvvm_ldg_i2((const int2 *)p);
+ __nvvm_ldg_ui2((const uint2 *)p);
+
+ // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16)
+ // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16)
+ typedef int int4 __attribute__((ext_vector_type(4)));
+ typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
+ __nvvm_ldg_i4((const int4 *)p);
+ __nvvm_ldg_ui4((const uint4 *)p);
+
+ // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16)
+ // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16)
+ typedef long long longlong2 __attribute__((ext_vector_type(2)));
+ typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2)));
+ __nvvm_ldg_ll2((const longlong2 *)p);
+ __nvvm_ldg_ull2((const ulonglong2 *)p);
+
+ // CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0v2f32(<2 x float>* {{%[0-9]+}}, i32 8)
+ typedef float float2 __attribute__((ext_vector_type(2)));
+ __nvvm_ldg_f2((const float2 *)p);
+
+ // CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0v4f32(<4 x float>* {{%[0-9]+}}, i32 16)
+ typedef float float4 __attribute__((ext_vector_type(4)));
+ __nvvm_ldg_f4((const float4 *)p);
+
+ // CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0v2f64(<2 x double>* {{%[0-9]+}}, i32 16)
+ typedef double double2 __attribute__((ext_vector_type(2)));
+ __nvvm_ldg_d2((const double2 *)p);
+}