diff options
Diffstat (limited to 'test/CodeGen/builtins-nvptx.c')
-rw-r--r-- | test/CodeGen/builtins-nvptx.c | 122 |
1 files changed, 109 insertions, 13 deletions
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c index 5f91f7ad3b0bb..ebf20673ddb48 100644 --- a/test/CodeGen/builtins-nvptx.c +++ b/test/CodeGen/builtins-nvptx.c @@ -1,8 +1,13 @@ // REQUIRES: nvptx-registered-target -// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s -int read_tid() { +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) + +__device__ int read_tid() { // CHECK: call i32 @llvm.ptx.read.tid.x() // CHECK: call i32 @llvm.ptx.read.tid.y() @@ -18,7 +23,7 @@ int read_tid() { } -int read_ntid() { +__device__ int read_ntid() { // CHECK: call i32 @llvm.ptx.read.ntid.x() // CHECK: call i32 @llvm.ptx.read.ntid.y() @@ -34,7 +39,7 @@ int read_ntid() { } -int read_ctaid() { +__device__ int read_ctaid() { // CHECK: call i32 @llvm.ptx.read.ctaid.x() // CHECK: call i32 @llvm.ptx.read.ctaid.y() @@ -50,7 +55,7 @@ int read_ctaid() { } -int read_nctaid() { +__device__ int read_nctaid() { // CHECK: call i32 @llvm.ptx.read.nctaid.x() // CHECK: call i32 @llvm.ptx.read.nctaid.y() @@ -66,7 +71,7 @@ int read_nctaid() { } -int read_ids() { +__device__ int read_ids() { // CHECK: call i32 @llvm.ptx.read.laneid() // CHECK: call i32 @llvm.ptx.read.warpid() @@ -86,7 +91,7 @@ int read_ids() { } -int read_lanemasks() { +__device__ int read_lanemasks() { // CHECK: call i32 @llvm.ptx.read.lanemask.eq() // CHECK: call i32 @llvm.ptx.read.lanemask.le() @@ -104,8 +109,7 @@ int read_lanemasks() { } - -long read_clocks() { +__device__ long read_clocks() { // CHECK: call i32 @llvm.ptx.read.clock() // CHECK: call i64 @llvm.ptx.read.clock64() @@ -117,7 +121,7 @@ long read_clocks() { } -int read_pms() { +__device__ int read_pms() { // CHECK: call i32 @llvm.ptx.read.pm0() // CHECK: call i32 @llvm.ptx.read.pm1() @@ -133,7 +137,7 @@ int read_pms() { } -void sync() { +__device__ void sync() { // CHECK: call void @llvm.ptx.bar.sync(i32 0) @@ -146,7 +150,7 @@ void sync() { // The idea is not to test all intrinsics, just that Clang is recognizing the // builtins defined in BuiltinsNVPTX.def -void nvvm_math(float f1, float f2, double d1, double d2) { +__device__ void nvvm_math(float f1, float f2, double d1, double d2) { // CHECK: call float @llvm.nvvm.fmax.f float t1 = __nvvm_fmax_f(f1, f2); // CHECK: call float @llvm.nvvm.fmin.f @@ -176,3 +180,95 @@ void nvvm_math(float f1, float f2, double d1, double d2) { // CHECK: call void @llvm.nvvm.barrier0() __nvvm_bar0(); } + +__device__ int di; +__shared__ int si; +__device__ long dl; +__shared__ long sl; +__device__ long long dll; +__shared__ long long sll; + +// Check for atomic intrinsics +// CHECK-LABEL: nvvm_atom +__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l, + long long *llp, long long ll) { + // CHECK: atomicrmw add + __nvvm_atom_add_gen_i(ip, i); + // CHECK: atomicrmw add + __nvvm_atom_add_gen_l(&dl, l); + // CHECK: atomicrmw add + __nvvm_atom_add_gen_ll(&sll, ll); + + // CHECK: atomicrmw sub + __nvvm_atom_sub_gen_i(ip, i); + // CHECK: atomicrmw sub + __nvvm_atom_sub_gen_l(&dl, l); + // CHECK: atomicrmw sub + __nvvm_atom_sub_gen_ll(&sll, ll); + + // CHECK: atomicrmw and + __nvvm_atom_and_gen_i(ip, i); + // CHECK: atomicrmw and + __nvvm_atom_and_gen_l(&dl, l); + // CHECK: atomicrmw and + __nvvm_atom_and_gen_ll(&sll, ll); + + // CHECK: atomicrmw or + __nvvm_atom_or_gen_i(ip, i); + // CHECK: atomicrmw or + __nvvm_atom_or_gen_l(&dl, l); + // CHECK: atomicrmw or + __nvvm_atom_or_gen_ll(&sll, ll); + + // CHECK: atomicrmw xor + __nvvm_atom_xor_gen_i(ip, i); + // CHECK: atomicrmw xor + __nvvm_atom_xor_gen_l(&dl, l); + // CHECK: atomicrmw xor + __nvvm_atom_xor_gen_ll(&sll, ll); + + // CHECK: atomicrmw xchg + __nvvm_atom_xchg_gen_i(ip, i); + // CHECK: atomicrmw xchg + __nvvm_atom_xchg_gen_l(&dl, l); + // CHECK: atomicrmw xchg + __nvvm_atom_xchg_gen_ll(&sll, ll); + + // CHECK: atomicrmw max + __nvvm_atom_max_gen_i(ip, i); + // CHECK: atomicrmw max + __nvvm_atom_max_gen_ui((unsigned int *)ip, i); + // CHECK: atomicrmw max + __nvvm_atom_max_gen_l(&dl, l); + // CHECK: atomicrmw max + __nvvm_atom_max_gen_ul((unsigned long *)&dl, l); + // CHECK: atomicrmw max + __nvvm_atom_max_gen_ll(&sll, ll); + // CHECK: atomicrmw max + __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll); + + // CHECK: atomicrmw min + __nvvm_atom_min_gen_i(ip, i); + // CHECK: atomicrmw min + __nvvm_atom_min_gen_ui((unsigned int *)ip, i); + // CHECK: atomicrmw min + __nvvm_atom_min_gen_l(&dl, l); + // CHECK: atomicrmw min + __nvvm_atom_min_gen_ul((unsigned long *)&dl, l); + // CHECK: atomicrmw min + __nvvm_atom_min_gen_ll(&sll, ll); + // CHECK: atomicrmw min + __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll); + + // CHECK: cmpxchg + __nvvm_atom_cas_gen_i(ip, 0, i); + // CHECK: cmpxchg + __nvvm_atom_cas_gen_l(&dl, 0, l); + // CHECK: cmpxchg + __nvvm_atom_cas_gen_ll(&sll, 0, ll); + + // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32 + __nvvm_atom_add_gen_f(fp, f); + + // CHECK: ret +} |