diff options
Diffstat (limited to 'test/CodeGen/builtins-nvptx.c')
-rw-r--r-- | test/CodeGen/builtins-nvptx.c | 252 |
1 files changed, 180 insertions, 72 deletions
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c index 745e74f0ca64a..cd21361140bcd 100644 --- a/test/CodeGen/builtins-nvptx.c +++ b/test/CodeGen/builtins-nvptx.c @@ -1,6 +1,8 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s -// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | \ +// RUN: FileCheck -check-prefix=CHECK -check-prefix=LP32 %s +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | \ +// RUN: FileCheck -check-prefix=CHECK -check-prefix=LP64 %s #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) @@ -9,15 +11,15 @@ __device__ int read_tid() { -// CHECK: call i32 @llvm.ptx.read.tid.x() -// CHECK: call i32 @llvm.ptx.read.tid.y() -// CHECK: call i32 @llvm.ptx.read.tid.z() -// CHECK: call i32 @llvm.ptx.read.tid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w() - int x = __builtin_ptx_read_tid_x(); - int y = __builtin_ptx_read_tid_y(); - int z = __builtin_ptx_read_tid_z(); - int w = __builtin_ptx_read_tid_w(); + int x = __nvvm_read_ptx_sreg_tid_x(); + int y = __nvvm_read_ptx_sreg_tid_y(); + int z = __nvvm_read_ptx_sreg_tid_z(); + int w = __nvvm_read_ptx_sreg_tid_w(); return x + y + z + w; @@ -25,15 +27,15 @@ __device__ int read_tid() { __device__ int read_ntid() { -// CHECK: call i32 @llvm.ptx.read.ntid.x() -// CHECK: call i32 @llvm.ptx.read.ntid.y() -// CHECK: call i32 @llvm.ptx.read.ntid.z() -// CHECK: call i32 @llvm.ptx.read.ntid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w() - int x = __builtin_ptx_read_ntid_x(); - int y = __builtin_ptx_read_ntid_y(); - int z = __builtin_ptx_read_ntid_z(); - int w = __builtin_ptx_read_ntid_w(); + int x = __nvvm_read_ptx_sreg_ntid_x(); + int y = __nvvm_read_ptx_sreg_ntid_y(); + int z = __nvvm_read_ptx_sreg_ntid_z(); + int w = __nvvm_read_ptx_sreg_ntid_w(); return x + y + z + w; @@ -41,15 +43,15 @@ __device__ int read_ntid() { __device__ int read_ctaid() { -// CHECK: call i32 @llvm.ptx.read.ctaid.x() -// CHECK: call i32 @llvm.ptx.read.ctaid.y() -// CHECK: call i32 @llvm.ptx.read.ctaid.z() -// CHECK: call i32 @llvm.ptx.read.ctaid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w() - int x = __builtin_ptx_read_ctaid_x(); - int y = __builtin_ptx_read_ctaid_y(); - int z = __builtin_ptx_read_ctaid_z(); - int w = __builtin_ptx_read_ctaid_w(); + int x = __nvvm_read_ptx_sreg_ctaid_x(); + int y = __nvvm_read_ptx_sreg_ctaid_y(); + int z = __nvvm_read_ptx_sreg_ctaid_z(); + int w = __nvvm_read_ptx_sreg_ctaid_w(); return x + y + z + w; @@ -57,15 +59,15 @@ __device__ int read_ctaid() { __device__ int read_nctaid() { -// CHECK: call i32 @llvm.ptx.read.nctaid.x() -// CHECK: call i32 @llvm.ptx.read.nctaid.y() -// CHECK: call i32 @llvm.ptx.read.nctaid.z() -// CHECK: call i32 @llvm.ptx.read.nctaid.w() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w() - int x = __builtin_ptx_read_nctaid_x(); - int y = __builtin_ptx_read_nctaid_y(); - int z = __builtin_ptx_read_nctaid_z(); - int w = __builtin_ptx_read_nctaid_w(); + int x = __nvvm_read_ptx_sreg_nctaid_x(); + int y = __nvvm_read_ptx_sreg_nctaid_y(); + int z = __nvvm_read_ptx_sreg_nctaid_z(); + int w = __nvvm_read_ptx_sreg_nctaid_w(); return x + y + z + w; @@ -73,19 +75,19 @@ __device__ int read_nctaid() { __device__ int read_ids() { -// CHECK: call i32 @llvm.ptx.read.laneid() -// CHECK: call i32 @llvm.ptx.read.warpid() -// CHECK: call i32 @llvm.ptx.read.nwarpid() -// CHECK: call i32 @llvm.ptx.read.smid() -// CHECK: call i32 @llvm.ptx.read.nsmid() -// CHECK: call i32 @llvm.ptx.read.gridid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid() - int a = __builtin_ptx_read_laneid(); - int b = __builtin_ptx_read_warpid(); - int c = __builtin_ptx_read_nwarpid(); - int d = __builtin_ptx_read_smid(); - int e = __builtin_ptx_read_nsmid(); - int f = __builtin_ptx_read_gridid(); + int a = __nvvm_read_ptx_sreg_laneid(); + int b = __nvvm_read_ptx_sreg_warpid(); + int c = __nvvm_read_ptx_sreg_nwarpid(); + int d = __nvvm_read_ptx_sreg_smid(); + int e = __nvvm_read_ptx_sreg_nsmid(); + int f = __nvvm_read_ptx_sreg_gridid(); return a + b + c + d + e + f; @@ -93,17 +95,17 @@ __device__ int read_ids() { __device__ int read_lanemasks() { -// CHECK: call i32 @llvm.ptx.read.lanemask.eq() -// CHECK: call i32 @llvm.ptx.read.lanemask.le() -// CHECK: call i32 @llvm.ptx.read.lanemask.lt() -// CHECK: call i32 @llvm.ptx.read.lanemask.ge() -// CHECK: call i32 @llvm.ptx.read.lanemask.gt() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt() - int a = __builtin_ptx_read_lanemask_eq(); - int b = __builtin_ptx_read_lanemask_le(); - int c = __builtin_ptx_read_lanemask_lt(); - int d = __builtin_ptx_read_lanemask_ge(); - int e = __builtin_ptx_read_lanemask_gt(); + int a = __nvvm_read_ptx_sreg_lanemask_eq(); + int b = __nvvm_read_ptx_sreg_lanemask_le(); + int c = __nvvm_read_ptx_sreg_lanemask_lt(); + int d = __nvvm_read_ptx_sreg_lanemask_ge(); + int e = __nvvm_read_ptx_sreg_lanemask_gt(); return a + b + c + d + e; @@ -111,26 +113,26 @@ __device__ int read_lanemasks() { __device__ long long read_clocks() { -// CHECK: call i32 @llvm.ptx.read.clock() -// CHECK: call i64 @llvm.ptx.read.clock64() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock() +// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64() - int a = __builtin_ptx_read_clock(); - long long b = __builtin_ptx_read_clock64(); + int a = __nvvm_read_ptx_sreg_clock(); + long long b = __nvvm_read_ptx_sreg_clock64(); return a + b; } __device__ int read_pms() { -// CHECK: call i32 @llvm.ptx.read.pm0() -// CHECK: call i32 @llvm.ptx.read.pm1() -// CHECK: call i32 @llvm.ptx.read.pm2() -// CHECK: call i32 @llvm.ptx.read.pm3() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2() +// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3() - int a = __builtin_ptx_read_pm0(); - int b = __builtin_ptx_read_pm1(); - int c = __builtin_ptx_read_pm2(); - int d = __builtin_ptx_read_pm3(); + int a = __nvvm_read_ptx_sreg_pm0(); + int b = __nvvm_read_ptx_sreg_pm1(); + int c = __nvvm_read_ptx_sreg_pm2(); + int d = __nvvm_read_ptx_sreg_pm3(); return a + b + c + d; @@ -138,9 +140,9 @@ __device__ int read_pms() { __device__ void sync() { -// CHECK: call void @llvm.ptx.bar.sync(i32 0) +// CHECK: call void @llvm.nvvm.bar.sync(i32 0) - __builtin_ptx_bar_sync(0); + __nvvm_bar_sync(0); } @@ -177,7 +179,7 @@ __device__ void nvvm_math(float f1, float f2, double d1, double d2) { // CHECK: call void @llvm.nvvm.membar.sys() __nvvm_membar_sys(); // CHECK: call void @llvm.nvvm.barrier0() - __nvvm_bar0(); + __syncthreads(); } __device__ int di; @@ -189,7 +191,7 @@ __shared__ long long sll; // Check for atomic intrinsics // CHECK-LABEL: nvvm_atom -__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l, +__device__ void nvvm_atom(float *fp, float f, int *ip, int i, unsigned int *uip, unsigned ui, long *lp, long l, long long *llp, long long ll) { // CHECK: atomicrmw add __nvvm_atom_add_gen_i(ip, i); @@ -272,5 +274,111 @@ __device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l, // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 __nvvm_atom_add_gen_f(fp, f); + // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32 + __nvvm_atom_inc_gen_ui(uip, ui); + + // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32 + __nvvm_atom_dec_gen_ui(uip, ui); + // CHECK: ret } + +// CHECK-LABEL: nvvm_ldg +__device__ void nvvm_ldg(const void *p) { + // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1) + // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1) + __nvvm_ldg_c((const char *)p); + __nvvm_ldg_uc((const unsigned char *)p); + + // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2) + // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2) + __nvvm_ldg_s((const short *)p); + __nvvm_ldg_us((const unsigned short *)p); + + // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) + // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) + __nvvm_ldg_i((const int *)p); + __nvvm_ldg_ui((const unsigned int *)p); + + // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) + // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4) + // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8) + // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8) + __nvvm_ldg_l((const long *)p); + __nvvm_ldg_ul((const unsigned long *)p); + + // CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0f32(float* {{%[0-9]+}}, i32 4) + __nvvm_ldg_f((const float *)p); + // CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0f64(double* {{%[0-9]+}}, i32 8) + __nvvm_ldg_d((const double *)p); + + // In practice, the pointers we pass to __ldg will be aligned as appropriate + // for the CUDA <type>N vector types (e.g. short4), which are not the same as + // the LLVM vector types. However, each LLVM vector type has an alignment + // less than or equal to its corresponding CUDA type, so we're OK. + // + // PTX Interoperability section 2.2: "For a vector with an even number of + // elements, its alignment is set to number of elements times the alignment of + // its member: n*alignof(t)." + + // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2) + // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2) + typedef char char2 __attribute__((ext_vector_type(2))); + typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); + __nvvm_ldg_c2((const char2 *)p); + __nvvm_ldg_uc2((const uchar2 *)p); + + // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4) + // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4) + typedef char char4 __attribute__((ext_vector_type(4))); + typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); + __nvvm_ldg_c4((const char4 *)p); + __nvvm_ldg_uc4((const uchar4 *)p); + + // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4) + // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4) + typedef short short2 __attribute__((ext_vector_type(2))); + typedef unsigned short ushort2 __attribute__((ext_vector_type(2))); + __nvvm_ldg_s2((const short2 *)p); + __nvvm_ldg_us2((const ushort2 *)p); + + // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8) + // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8) + typedef short short4 __attribute__((ext_vector_type(4))); + typedef unsigned short ushort4 __attribute__((ext_vector_type(4))); + __nvvm_ldg_s4((const short4 *)p); + __nvvm_ldg_us4((const ushort4 *)p); + + // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8) + // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8) + typedef int int2 __attribute__((ext_vector_type(2))); + typedef unsigned int uint2 __attribute__((ext_vector_type(2))); + __nvvm_ldg_i2((const int2 *)p); + __nvvm_ldg_ui2((const uint2 *)p); + + // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16) + // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16) + typedef int int4 __attribute__((ext_vector_type(4))); + typedef unsigned int uint4 __attribute__((ext_vector_type(4))); + __nvvm_ldg_i4((const int4 *)p); + __nvvm_ldg_ui4((const uint4 *)p); + + // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16) + // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16) + typedef long long longlong2 __attribute__((ext_vector_type(2))); + typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2))); + __nvvm_ldg_ll2((const longlong2 *)p); + __nvvm_ldg_ull2((const ulonglong2 *)p); + + // CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0v2f32(<2 x float>* {{%[0-9]+}}, i32 8) + typedef float float2 __attribute__((ext_vector_type(2))); + __nvvm_ldg_f2((const float2 *)p); + + // CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0v4f32(<4 x float>* {{%[0-9]+}}, i32 16) + typedef float float4 __attribute__((ext_vector_type(4))); + __nvvm_ldg_f4((const float4 *)p); + + // CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0v2f64(<2 x double>* {{%[0-9]+}}, i32 16) + typedef double double2 __attribute__((ext_vector_type(2))); + __nvvm_ldg_d2((const double2 *)p); +} |