aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGenCUDA
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGenCUDA')
-rw-r--r--test/CodeGenCUDA/builtins-amdgcn.cu18
-rw-r--r--test/CodeGenCUDA/device-stub.cu39
-rw-r--r--test/CodeGenCUDA/device-var-init.cu203
-rw-r--r--test/CodeGenCUDA/link-device-bitcode.cu10
-rw-r--r--test/CodeGenCUDA/propagate-metadata.cu8
-rw-r--r--test/CodeGenCUDA/usual-deallocators.cu133
6 files changed, 307 insertions, 104 deletions
diff --git a/test/CodeGenCUDA/builtins-amdgcn.cu b/test/CodeGenCUDA/builtins-amdgcn.cu
new file mode 100644
index 000000000000..82a666717ac7
--- /dev/null
+++ b/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+
+// CHECK-LABEL: @_Z16use_dispatch_ptrPi(
+// CHECK: %2 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+// CHECK: %3 = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(4)**
+__global__ void use_dispatch_ptr(int* out) {
+ const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr();
+ *out = *dispatch_ptr;
+}
+
+// CHECK-LABEL: @_Z12test_ds_fmaxf(
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float %2, i32 0, i32 0, i1 false)
+__global__
+void test_ds_fmax(float src) {
+ __shared__ float shared;
+ volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false);
+}
diff --git a/test/CodeGenCUDA/device-stub.cu b/test/CodeGenCUDA/device-stub.cu
index 716381b7a826..ea45c391d20c 100644
--- a/test/CodeGenCUDA/device-stub.cu
+++ b/test/CodeGenCUDA/device-stub.cu
@@ -6,22 +6,22 @@
// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
-// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - \
+// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,RDC,CUDA,CUDARDC
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
-// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP
+// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,NORDC,HIP,HIPNEF
#include "Inputs/cuda.h"
@@ -42,13 +42,20 @@ int host_var;
// ALL-DAG: @ext_host_var = external global i32
extern int ext_host_var;
-// Shadows for external device-side variables are *definitions* of
-// those variables.
-// ALL-DAG: @ext_device_var = internal global i32
+// external device-side variables -> extern references to their shadows.
+// ALL-DAG: @ext_device_var = external global i32
extern __device__ int ext_device_var;
-// ALL-DAG: @ext_device_var = internal global i32
+// ALL-DAG: @ext_device_var = external global i32
extern __constant__ int ext_constant_var;
+// external device-side variables with definitions should generate
+// definitions for the shadows.
+// ALL-DAG: @ext_device_var_def = internal global i32 undef,
+extern __device__ int ext_device_var_def;
+__device__ int ext_device_var_def = 1;
+// ALL-DAG: @ext_device_var_def = internal global i32 undef,
+__constant__ int ext_constant_var_def = 2;
+
void use_pointers() {
int *p;
p = &device_var;
@@ -64,8 +71,9 @@ void use_pointers() {
// * constant unnamed string with the kernel name
// ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
// * constant unnamed string with GPU binary
-// HIP: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
// CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
+// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
+// HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
// CUDANORDC-SAME: section ".nv_fatbin", align 8
// CUDARDC-SAME: section "__nv_relfatbin", align 8
// * constant struct that wraps GPU binary
@@ -74,13 +82,14 @@ void use_pointers() {
// CUDA-SAME: { i32 1180844977, i32 1,
// HIP-SAME: { i32 1212764230, i32 1,
// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
-// HIP-SAME: i8* @[[FATBIN]],
+// HIPEF-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
+// HIPNEF-SAME: i8* @[[FATBIN]],
// ALL-SAME: i8* null }
// CUDA-SAME: section ".nvFatBinSegment"
// HIP-SAME: section ".hipFatBinSegment"
// * variable to save GPU binary handle after initialization
// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
-// HIP: @__[[PREFIX]]_gpubin_handle = linkonce global i8** null
+// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
// * constant unnamed string with NVModuleID
// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
@@ -112,8 +121,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc
// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0
// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0
-// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0
+// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0
// ALL: ret void
// Test that we've built a constructor.
@@ -157,7 +166,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// device-side globals, but we still need to register GPU binary.
// Skip GPU binary string first.
// CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
-// HIPNOGLOBALS: @{{.*}} = external constant{{.*}}
+// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}}
// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
diff --git a/test/CodeGenCUDA/device-var-init.cu b/test/CodeGenCUDA/device-var-init.cu
index f96e42d9711c..af42e698cfe9 100644
--- a/test/CodeGenCUDA/device-var-init.cu
+++ b/test/CodeGenCUDA/device-var-init.cu
@@ -5,10 +5,12 @@
// variables, but accept empty constructors allowed by CUDA.
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
-// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,NVPTX %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -std=c++11 \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=HOST %s
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
-// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,AMDGCN %s
#ifdef __clang__
#include "Inputs/cuda.h"
@@ -18,105 +20,140 @@
#include "Inputs/cuda-initializers.h"
__device__ int d_v;
-// CHECK: @d_v = addrspace(1) externally_initialized global i32 0,
+// DEVICE: @d_v = addrspace(1) externally_initialized global i32 0,
+// HOST: @d_v = internal global i32 undef,
__shared__ int s_v;
-// CHECK: @s_v = addrspace(3) global i32 undef,
+// DEVICE: @s_v = addrspace(3) global i32 undef,
+// HOST: @s_v = internal global i32 undef,
__constant__ int c_v;
-// CHECK: addrspace(4) externally_initialized global i32 0,
+// DEVICE: addrspace(4) externally_initialized global i32 0,
+// HOST: @c_v = internal global i32 undef,
__device__ int d_v_i = 1;
-// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1,
+// DEVICE: @d_v_i = addrspace(1) externally_initialized global i32 1,
+// HOST: @d_v_i = internal global i32 undef,
// trivial constructor -- allowed
__device__ T d_t;
-// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer
+// DEVICE: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer
+// HOST: @d_t = internal global %struct.T undef,
__shared__ T s_t;
-// CHECK: @s_t = addrspace(3) global %struct.T undef,
+// DEVICE: @s_t = addrspace(3) global %struct.T undef,
+// HOST: @s_t = internal global %struct.T undef,
__constant__ T c_t;
-// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer,
+// DEVICE: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer,
+// HOST: @c_t = internal global %struct.T undef,
__device__ T d_t_i = {2};
-// CHECK: @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 },
+// DEVICE: @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 },
+// HOST: @d_t_i = internal global %struct.T undef,
__constant__ T c_t_i = {2};
-// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 },
+// DEVICE: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 },
+// HOST: @c_t_i = internal global %struct.T undef,
// empty constructor
__device__ EC d_ec;
-// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer,
+// DEVICE: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer,
+// HOST: @d_ec = internal global %struct.EC undef,
__shared__ EC s_ec;
-// CHECK: @s_ec = addrspace(3) global %struct.EC undef,
+// DEVICE: @s_ec = addrspace(3) global %struct.EC undef,
+// HOST: @s_ec = internal global %struct.EC undef,
__constant__ EC c_ec;
-// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer,
+// DEVICE: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer,
+// HOST: @c_ec = internal global %struct.EC undef
// empty destructor
__device__ ED d_ed;
-// CHECK: @d_ed = addrspace(1) externally_initialized global %struct.ED zeroinitializer,
+// DEVICE: @d_ed = addrspace(1) externally_initialized global %struct.ED zeroinitializer,
+// HOST: @d_ed = internal global %struct.ED undef,
__shared__ ED s_ed;
-// CHECK: @s_ed = addrspace(3) global %struct.ED undef,
+// DEVICE: @s_ed = addrspace(3) global %struct.ED undef,
+// HOST: @s_ed = internal global %struct.ED undef,
__constant__ ED c_ed;
-// CHECK: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer,
+// DEVICE: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer,
+// HOST: @c_ed = internal global %struct.ED undef,
__device__ ECD d_ecd;
-// CHECK: @d_ecd = addrspace(1) externally_initialized global %struct.ECD zeroinitializer,
+// DEVICE: @d_ecd = addrspace(1) externally_initialized global %struct.ECD zeroinitializer,
+// HOST: @d_ecd = internal global %struct.ECD undef,
__shared__ ECD s_ecd;
-// CHECK: @s_ecd = addrspace(3) global %struct.ECD undef,
+// DEVICE: @s_ecd = addrspace(3) global %struct.ECD undef,
+// HOST: @s_ecd = internal global %struct.ECD undef,
__constant__ ECD c_ecd;
-// CHECK: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer,
+// DEVICE: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer,
+// HOST: @c_ecd = internal global %struct.ECD undef,
// empty templated constructor -- allowed with no arguments
__device__ ETC d_etc;
-// CHECK: @d_etc = addrspace(1) externally_initialized global %struct.ETC zeroinitializer,
+// DEVICE: @d_etc = addrspace(1) externally_initialized global %struct.ETC zeroinitializer,
+// HOST: @d_etc = internal global %struct.ETC undef,
__shared__ ETC s_etc;
-// CHECK: @s_etc = addrspace(3) global %struct.ETC undef,
+// DEVICE: @s_etc = addrspace(3) global %struct.ETC undef,
+// HOST: @s_etc = internal global %struct.ETC undef,
__constant__ ETC c_etc;
-// CHECK: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer,
+// DEVICE: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer,
+// HOST: @c_etc = internal global %struct.ETC undef,
__device__ NCFS d_ncfs;
-// CHECK: @d_ncfs = addrspace(1) externally_initialized global %struct.NCFS { i32 3 }
+// DEVICE: @d_ncfs = addrspace(1) externally_initialized global %struct.NCFS { i32 3 }
+// HOST: @d_ncfs = internal global %struct.NCFS undef,
__constant__ NCFS c_ncfs;
-// CHECK: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 }
+// DEVICE: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 }
+// HOST: @c_ncfs = internal global %struct.NCFS undef,
// Regular base class -- allowed
__device__ T_B_T d_t_b_t;
-// CHECK: @d_t_b_t = addrspace(1) externally_initialized global %struct.T_B_T zeroinitializer,
+// DEVICE: @d_t_b_t = addrspace(1) externally_initialized global %struct.T_B_T zeroinitializer,
+// HOST: @d_t_b_t = internal global %struct.T_B_T undef,
__shared__ T_B_T s_t_b_t;
-// CHECK: @s_t_b_t = addrspace(3) global %struct.T_B_T undef,
+// DEVICE: @s_t_b_t = addrspace(3) global %struct.T_B_T undef,
+// HOST: @s_t_b_t = internal global %struct.T_B_T undef,
__constant__ T_B_T c_t_b_t;
-// CHECK: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer,
+// DEVICE: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer,
+// HOST: @c_t_b_t = internal global %struct.T_B_T undef,
// Incapsulated object of allowed class -- allowed
__device__ T_F_T d_t_f_t;
-// CHECK: @d_t_f_t = addrspace(1) externally_initialized global %struct.T_F_T zeroinitializer,
+// DEVICE: @d_t_f_t = addrspace(1) externally_initialized global %struct.T_F_T zeroinitializer,
+// HOST: @d_t_f_t = internal global %struct.T_F_T undef,
__shared__ T_F_T s_t_f_t;
-// CHECK: @s_t_f_t = addrspace(3) global %struct.T_F_T undef,
+// DEVICE: @s_t_f_t = addrspace(3) global %struct.T_F_T undef,
+// HOST: @s_t_f_t = internal global %struct.T_F_T undef,
__constant__ T_F_T c_t_f_t;
-// CHECK: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer,
+// DEVICE: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer,
+// HOST: @c_t_f_t = internal global %struct.T_F_T undef,
// array of allowed objects -- allowed
__device__ T_FA_T d_t_fa_t;
-// CHECK: @d_t_fa_t = addrspace(1) externally_initialized global %struct.T_FA_T zeroinitializer,
+// DEVICE: @d_t_fa_t = addrspace(1) externally_initialized global %struct.T_FA_T zeroinitializer,
+// HOST: @d_t_fa_t = internal global %struct.T_FA_T undef,
__shared__ T_FA_T s_t_fa_t;
-// CHECK: @s_t_fa_t = addrspace(3) global %struct.T_FA_T undef,
+// DEVICE: @s_t_fa_t = addrspace(3) global %struct.T_FA_T undef,
+// HOST: @s_t_fa_t = internal global %struct.T_FA_T undef,
__constant__ T_FA_T c_t_fa_t;
-// CHECK: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer,
+// DEVICE: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer,
+// HOST: @c_t_fa_t = internal global %struct.T_FA_T undef,
// Calling empty base class initializer is OK
__device__ EC_I_EC d_ec_i_ec;
-// CHECK: @d_ec_i_ec = addrspace(1) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// DEVICE: @d_ec_i_ec = addrspace(1) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// HOST: @d_ec_i_ec = internal global %struct.EC_I_EC undef,
__shared__ EC_I_EC s_ec_i_ec;
-// CHECK: @s_ec_i_ec = addrspace(3) global %struct.EC_I_EC undef,
+// DEVICE: @s_ec_i_ec = addrspace(3) global %struct.EC_I_EC undef,
+// HOST: @s_ec_i_ec = internal global %struct.EC_I_EC undef,
__constant__ EC_I_EC c_ec_i_ec;
-// CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// DEVICE: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// HOST: @c_ec_i_ec = internal global %struct.EC_I_EC undef,
-// CHECK: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
-// CHECK: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef
+// DEVICE: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
+// DEVICE: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef
-// CHECK: @_ZZ2dfvE11const_array = internal addrspace(4) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]
-// CHECK: @_ZZ2dfvE9const_int = internal addrspace(4) constant i32 123
+// DEVICE: @_ZZ2dfvE11const_array = internal addrspace(4) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5]
+// DEVICE: @_ZZ2dfvE9const_int = internal addrspace(4) constant i32 123
// We should not emit global initializers for device-side variables.
-// CHECK-NOT: @__cxx_global_var_init
+// DEVICE-NOT: @__cxx_global_var_init
// Make sure that initialization restrictions do not apply to local
// variables.
@@ -171,90 +208,90 @@ __device__ void df() {
// AMDGCN: %[[t_fa_ned:.*]] = addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED*
T t;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
EC ec;
- // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %[[ec]])
+ // DEVICE: call void @_ZN2ECC1Ev(%struct.EC* %[[ec]])
ED ed;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
ECD ecd;
- // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]])
+ // DEVICE: call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]])
ETC etc;
- // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]])
+ // DEVICE: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]])
UC uc;
// undefined constructor -- not allowed
- // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %[[uc]])
+ // DEVICE: call void @_ZN2UCC1Ev(%struct.UC* %[[uc]])
UD ud;
// undefined destructor -- not allowed
- // CHECK-NOT: call
+ // DEVICE-NOT: call
ECI eci;
// empty constructor w/ initializer list -- not allowed
- // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]])
+ // DEVICE: call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]])
NEC nec;
// non-empty constructor -- not allowed
- // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]])
+ // DEVICE: call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]])
// non-empty destructor -- not allowed
NED ned;
// no-constructor, virtual method -- not allowed
- // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]])
+ // DEVICE: call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]])
NCV ncv;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
VD vd;
- // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %[[vd]])
+ // DEVICE: call void @_ZN2VDC1Ev(%struct.VD* %[[vd]])
NCF ncf;
- // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]])
+ // DEVICE: call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]])
NCFS ncfs;
- // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]])
+ // DEVICE: call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]])
UTC utc;
- // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]])
+ // DEVICE: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]])
NETC netc;
- // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]])
+ // DEVICE: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]])
T_B_T t_b_t;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
T_F_T t_f_t;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
T_FA_T t_fa_t;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
EC_I_EC ec_i_ec;
- // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]])
+ // DEVICE: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]])
EC_I_EC1 ec_i_ec1;
- // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]])
+ // DEVICE: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]])
T_V_T t_v_t;
- // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]])
+ // DEVICE: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]])
T_B_NEC t_b_nec;
- // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]])
+ // DEVICE: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]])
T_F_NEC t_f_nec;
- // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]])
+ // DEVICE: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]])
T_FA_NEC t_fa_nec;
- // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]])
+ // DEVICE: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]])
T_B_NED t_b_ned;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
T_F_NED t_f_ned;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
T_FA_NED t_fa_ned;
- // CHECK-NOT: call
+ // DEVICE-NOT: call
static __shared__ EC s_ec;
- // CHECK-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*))
+ // DEVICE-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*))
static __shared__ ETC s_etc;
- // CHECK-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*))
+ // DEVICE-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*))
static const int const_array[] = {1, 2, 3, 4, 5};
static const int const_int = 123;
// anchor point separating constructors and destructors
- df(); // CHECK: call void @_Z2dfv()
+ df(); // DEVICE: call void @_Z2dfv()
// Verify that we only call non-empty destructors
- // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]])
- // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]])
- // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]])
- // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]])
- // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]])
- // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]])
- // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]])
- // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]])
+ // DEVICE-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]])
+ // DEVICE-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]])
+ // DEVICE-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]])
+ // DEVICE-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]])
+ // DEVICE-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]])
+ // DEVICE-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]])
+ // DEVICE-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]])
+ // DEVICE-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]])
- // CHECK-NEXT: ret void
+ // DEVICE-NEXT: ret void
}
// We should not emit global init function.
-// CHECK-NOT: @_GLOBAL__sub_I
+// DEVICE-NOT: @_GLOBAL__sub_I
diff --git a/test/CodeGenCUDA/link-device-bitcode.cu b/test/CodeGenCUDA/link-device-bitcode.cu
index b307838ae82d..69dc051355de 100644
--- a/test/CodeGenCUDA/link-device-bitcode.cu
+++ b/test/CodeGenCUDA/link-device-bitcode.cu
@@ -11,13 +11,19 @@
//
// Make sure function in device-code gets linked in and internalized.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \
+// RUN: -disable-llvm-passes -o - %s \
+// RUN: | FileCheck %s -check-prefix CHECK-IR
+
+// Make sure legacy flag name works
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \
// RUN: -disable-llvm-passes -o - %s \
// RUN: | FileCheck %s -check-prefix CHECK-IR
//
// Make sure we can link two bitcode files.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
-// RUN: -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \
+// RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \
// RUN: -emit-llvm -disable-llvm-passes -o - %s \
// RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
//
@@ -30,7 +36,7 @@
//
// Make sure NVVMReflect pass is enabled in NVPTX back-end.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
-// RUN: -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \
+// RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \
// RUN: -mllvm -debug-pass=Structure 2>&1 \
// RUN: | FileCheck %s -check-prefix CHECK-REFLECT
diff --git a/test/CodeGenCUDA/propagate-metadata.cu b/test/CodeGenCUDA/propagate-metadata.cu
index 1616cb933748..773dd8afba81 100644
--- a/test/CodeGenCUDA/propagate-metadata.cu
+++ b/test/CodeGenCUDA/propagate-metadata.cu
@@ -1,5 +1,5 @@
// Check that when we link a bitcode module into a file using
-// -mlink-cuda-bitcode, we apply the same attributes to the functions in that
+// -mlink-builtin-bitcode, we apply the same attributes to the functions in that
// bitcode module as we apply to functions we generate.
//
// In particular, we check that ftz and unsafe-math are propagated into the
@@ -14,17 +14,17 @@
// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \
// RUN: %s -o %t.bc -triple nvptx-unknown-unknown
-// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc -o - \
+// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \
// RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST
-// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \
+// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \
// RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \
// RUN: -fcuda-is-device -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ \
// RUN: --check-prefix=NOFAST
-// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \
+// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \
// RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \
// RUN: -fcuda-is-device -menable-unsafe-fp-math -triple nvptx-unknown-unknown \
// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
diff --git a/test/CodeGenCUDA/usual-deallocators.cu b/test/CodeGenCUDA/usual-deallocators.cu
new file mode 100644
index 000000000000..2d97c8c9f7de
--- /dev/null
+++ b/test/CodeGenCUDA/usual-deallocators.cu
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \
+// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,DEVICE
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown \
+// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,HOST
+// RUN: %clang_cc1 %s --std=c++17 -triple nvptx-unknown-unknown -fcuda-is-device \
+// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,DEVICE
+// RUN: %clang_cc1 %s --std=c++17 -triple nvptx-unknown-unknown \
+// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,HOST
+
+#include "Inputs/cuda.h"
+extern "C" __host__ void host_fn();
+extern "C" __device__ void dev_fn();
+extern "C" __host__ __device__ void hd_fn();
+
+struct H1D1 {
+ __host__ void operator delete(void *) { host_fn(); };
+ __device__ void operator delete(void *) { dev_fn(); };
+};
+
+struct H1D2 {
+ __host__ void operator delete(void *) { host_fn(); };
+ __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); };
+};
+
+struct H2D1 {
+ __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); };
+ __device__ void operator delete(void *) { dev_fn(); };
+};
+
+struct H2D2 {
+ __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); };
+ __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); };
+};
+
+struct H1D1D2 {
+ __host__ void operator delete(void *) { host_fn(); };
+ __device__ void operator delete(void *) { dev_fn(); };
+ __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); };
+};
+
+struct H1H2D1 {
+ __host__ void operator delete(void *) { host_fn(); };
+ __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); };
+ __device__ void operator delete(void *) { dev_fn(); };
+};
+
+struct H1H2D2 {
+ __host__ void operator delete(void *) { host_fn(); };
+ __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); };
+ __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); };
+};
+
+struct H1H2D1D2 {
+ __host__ void operator delete(void *) { host_fn(); };
+ __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); };
+ __device__ void operator delete(void *) { dev_fn(); };
+ __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); };
+};
+
+
+template <typename T>
+__host__ __device__ void test_hd(void *p) {
+ T *t = (T *)p;
+ delete t;
+}
+
+// Make sure we call the right variant of usual deallocator.
+__host__ __device__ void tests_hd(void *t) {
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H1D1EvPv
+ // COMMON: call void @_ZN4H1D1dlEPv
+ test_hd<H1D1>(t);
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H1D2EvPv
+ // DEVICE: call void @_ZN4H1D2dlEPvj(i8* {{.*}}, i32 1)
+ // HOST: call void @_ZN4H1D2dlEPv(i8* {{.*}})
+ test_hd<H1D2>(t);
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H2D1EvPv
+ // DEVICE: call void @_ZN4H2D1dlEPv(i8* {{.*}})
+ // HOST: call void @_ZN4H2D1dlEPvj(i8* %3, i32 1)
+ test_hd<H2D1>(t);
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H2D2EvPv
+ // COMMON: call void @_ZN4H2D2dlEPvj(i8* {{.*}}, i32 1)
+ test_hd<H2D2>(t);
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI6H1D1D2EvPv
+ // COMMON: call void @_ZN6H1D1D2dlEPv(i8* %3)
+ test_hd<H1D1D2>(t);
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI6H1H2D1EvPv
+ // COMMON: call void @_ZN6H1H2D1dlEPv(i8* {{.*}})
+ test_hd<H1H2D1>(t);
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI6H1H2D2EvPv
+ // DEVICE: call void @_ZN6H1H2D2dlEPvj(i8* {{.*}}, i32 1)
+ // HOST: call void @_ZN6H1H2D2dlEPv(i8* {{.*}})
+ test_hd<H1H2D2>(t);
+ // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI8H1H2D1D2EvPv
+ // COMMON: call void @_ZN8H1H2D1D2dlEPv(i8* {{.*}})
+ test_hd<H1H2D1D2>(t);
+}
+
+// Make sure we've picked deallocator for the correct side of compilation.
+
+// COMMON-LABEL: define linkonce_odr void @_ZN4H1D1dlEPv(i8*)
+// DEVICE: call void @dev_fn()
+// HOST: call void @host_fn()
+
+// DEVICE-LABEL: define linkonce_odr void @_ZN4H1D2dlEPvj(i8*, i32)
+// DEVICE: call void @dev_fn()
+// HOST-LABEL: define linkonce_odr void @_ZN4H1D2dlEPv(i8*)
+// HOST: call void @host_fn()
+
+// DEVICE-LABEL: define linkonce_odr void @_ZN4H2D1dlEPv(i8*)
+// DEVICE: call void @dev_fn()
+// HOST-LABEL: define linkonce_odr void @_ZN4H2D1dlEPvj(i8*, i32)
+// HOST: call void @host_fn()
+
+// COMMON-LABEL: define linkonce_odr void @_ZN4H2D2dlEPvj(i8*, i32)
+// DEVICE: call void @dev_fn()
+// HOST: call void @host_fn()
+
+// COMMON-LABEL: define linkonce_odr void @_ZN6H1D1D2dlEPv(i8*)
+// DEVICE: call void @dev_fn()
+// HOST: call void @host_fn()
+
+// COMMON-LABEL: define linkonce_odr void @_ZN6H1H2D1dlEPv(i8*)
+// DEVICE: call void @dev_fn()
+// HOST: call void @host_fn()
+
+// DEVICE-LABEL: define linkonce_odr void @_ZN6H1H2D2dlEPvj(i8*, i32)
+// DEVICE: call void @dev_fn()
+// HOST-LABEL: define linkonce_odr void @_ZN6H1H2D2dlEPv(i8*)
+// HOST: call void @host_fn()
+
+// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(i8*)
+// DEVICE: call void @dev_fn()
+// HOST: call void @host_fn()