1 files changed, 109 insertions, 13 deletions
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c
index 5f91f7ad3b0bb..ebf20673ddb48 100644
--- a/test/CodeGen/builtins-nvptx.c
+++ b/test/CodeGen/builtins-nvptx.c
@@ -1,8 +1,13 @@
 // REQUIRES: nvptx-registered-target
-// RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | FileCheck %s
 
-int read_tid() {
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+__device__ int read_tid() {
 
 // CHECK: call i32 @llvm.ptx.read.tid.x()
 // CHECK: call i32 @llvm.ptx.read.tid.y()
@@ -18,7 +23,7 @@ int read_tid() {
 
 }
 
-int read_ntid() {
+__device__ int read_ntid() {
 
 // CHECK: call i32 @llvm.ptx.read.ntid.x()
 // CHECK: call i32 @llvm.ptx.read.ntid.y()
@@ -34,7 +39,7 @@ int read_ntid() {
 
 }
 
-int read_ctaid() {
+__device__ int read_ctaid() {
 
 // CHECK: call i32 @llvm.ptx.read.ctaid.x()
 // CHECK: call i32 @llvm.ptx.read.ctaid.y()
@@ -50,7 +55,7 @@ int read_ctaid() {
 
 }
 
-int read_nctaid() {
+__device__ int read_nctaid() {
 
 // CHECK: call i32 @llvm.ptx.read.nctaid.x()
 // CHECK: call i32 @llvm.ptx.read.nctaid.y()
@@ -66,7 +71,7 @@ int read_nctaid() {
 
 }
 
-int read_ids() {
+__device__ int read_ids() {
 
 // CHECK: call i32 @llvm.ptx.read.laneid()
 // CHECK: call i32 @llvm.ptx.read.warpid()
@@ -86,7 +91,7 @@ int read_ids() {
 
 }
 
-int read_lanemasks() {
+__device__ int read_lanemasks() {
 
 // CHECK: call i32 @llvm.ptx.read.lanemask.eq()
 // CHECK: call i32 @llvm.ptx.read.lanemask.le()
@@ -104,8 +109,7 @@ int read_lanemasks() {
 
 }
 
-
-long read_clocks() {
+__device__ long read_clocks() {
 
 // CHECK: call i32 @llvm.ptx.read.clock()
 // CHECK: call i64 @llvm.ptx.read.clock64()
@@ -117,7 +121,7 @@ long read_clocks() {
 
 }
 
-int read_pms() {
+__device__ int read_pms() {
 
 // CHECK: call i32 @llvm.ptx.read.pm0()
 // CHECK: call i32 @llvm.ptx.read.pm1()
@@ -133,7 +137,7 @@ int read_pms() {
 
 }
 
-void sync() {
+__device__ void sync() {
 
 // CHECK: call void @llvm.ptx.bar.sync(i32 0)
 
@@ -146,7 +150,7 @@ void sync() {
 
 // The idea is not to test all intrinsics, just that Clang is recognizing the
 // builtins defined in BuiltinsNVPTX.def
-void nvvm_math(float f1, float f2, double d1, double d2) {
+__device__ void nvvm_math(float f1, float f2, double d1, double d2) {
 // CHECK: call float @llvm.nvvm.fmax.f
   float t1 = __nvvm_fmax_f(f1, f2);
 // CHECK: call float @llvm.nvvm.fmin.f
@@ -176,3 +180,95 @@ void nvvm_math(float f1, float f2, double d1, double d2) {
 // CHECK: call void @llvm.nvvm.barrier0()
   __nvvm_bar0();
 }
+
+__device__ int di;
+__shared__ int si;
+__device__ long dl;
+__shared__ long sl;
+__device__ long long dll;
+__shared__ long long sll;
+
+// Check for atomic intrinsics
+// CHECK-LABEL: nvvm_atom
+__device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
+                          long long *llp, long long ll) {
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_i(ip, i);
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_l(&dl, l);
+  // CHECK: atomicrmw add
+  __nvvm_atom_add_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_i(ip, i);
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_l(&dl, l);
+  // CHECK: atomicrmw sub
+  __nvvm_atom_sub_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_i(ip, i);
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_l(&dl, l);
+  // CHECK: atomicrmw and
+  __nvvm_atom_and_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_i(ip, i);
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_l(&dl, l);
+  // CHECK: atomicrmw or
+  __nvvm_atom_or_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_i(ip, i);
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_l(&dl, l);
+  // CHECK: atomicrmw xor
+  __nvvm_atom_xor_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_i(ip, i);
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_l(&dl, l);
+  // CHECK: atomicrmw xchg
+  __nvvm_atom_xchg_gen_ll(&sll, ll);
+
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_i(ip, i);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ui((unsigned int *)ip, i);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_l(&dl, l);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ll(&sll, ll);
+  // CHECK: atomicrmw max
+  __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
+
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_i(ip, i);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ui((unsigned int *)ip, i);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_l(&dl, l);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ll(&sll, ll);
+  // CHECK: atomicrmw min
+  __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
+
+  // CHECK: cmpxchg
+  __nvvm_atom_cas_gen_i(ip, 0, i);
+  // CHECK: cmpxchg
+  __nvvm_atom_cas_gen_l(&dl, 0, l);
+  // CHECK: cmpxchg
+  __nvvm_atom_cas_gen_ll(&sll, 0, ll);
+
+  // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
+  __nvvm_atom_add_gen_f(fp, f);
+
+  // CHECK: ret
+}