diff options
Diffstat (limited to 'test/CodeGenCUDA')
| -rw-r--r-- | test/CodeGenCUDA/builtins-amdgcn.cu | 18 | ||||
| -rw-r--r-- | test/CodeGenCUDA/device-stub.cu | 39 | ||||
| -rw-r--r-- | test/CodeGenCUDA/device-var-init.cu | 203 | ||||
| -rw-r--r-- | test/CodeGenCUDA/link-device-bitcode.cu | 10 | ||||
| -rw-r--r-- | test/CodeGenCUDA/propagate-metadata.cu | 8 | ||||
| -rw-r--r-- | test/CodeGenCUDA/usual-deallocators.cu | 133 |
6 files changed, 307 insertions, 104 deletions
diff --git a/test/CodeGenCUDA/builtins-amdgcn.cu b/test/CodeGenCUDA/builtins-amdgcn.cu new file mode 100644 index 000000000000..82a666717ac7 --- /dev/null +++ b/test/CodeGenCUDA/builtins-amdgcn.cu @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s +#include "Inputs/cuda.h" + +// CHECK-LABEL: @_Z16use_dispatch_ptrPi( +// CHECK: %2 = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +// CHECK: %3 = addrspacecast i8 addrspace(4)* %2 to i8 addrspace(4)** +__global__ void use_dispatch_ptr(int* out) { + const int* dispatch_ptr = (const int*)__builtin_amdgcn_dispatch_ptr(); + *out = *dispatch_ptr; +} + +// CHECK-LABEL: @_Z12test_ds_fmaxf( +// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* @_ZZ12test_ds_fmaxfE6shared, float %2, i32 0, i32 0, i1 false) +__global__ +void test_ds_fmax(float src) { + __shared__ float shared; + volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); +} diff --git a/test/CodeGenCUDA/device-stub.cu b/test/CodeGenCUDA/device-stub.cu index 716381b7a826..ea45c391d20c 100644 --- a/test/CodeGenCUDA/device-stub.cu +++ b/test/CodeGenCUDA/device-stub.cu @@ -6,22 +6,22 @@ // RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ -// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - \ +// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,RDC,CUDA,CUDARDC // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -x hip\ -// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP +// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ -// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \ -// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP +// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\ -// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN +// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,NORDC,HIP,HIPNEF #include "Inputs/cuda.h" @@ -42,13 +42,20 @@ int host_var; // ALL-DAG: @ext_host_var = external global i32 extern int ext_host_var; -// Shadows for external device-side variables are *definitions* of -// those variables. -// ALL-DAG: @ext_device_var = internal global i32 +// external device-side variables -> extern references to their shadows. +// ALL-DAG: @ext_device_var = external global i32 extern __device__ int ext_device_var; -// ALL-DAG: @ext_device_var = internal global i32 +// ALL-DAG: @ext_device_var = external global i32 extern __constant__ int ext_constant_var; +// external device-side variables with definitions should generate +// definitions for the shadows. +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +extern __device__ int ext_device_var_def; +__device__ int ext_device_var_def = 1; +// ALL-DAG: @ext_device_var_def = internal global i32 undef, +__constant__ int ext_constant_var_def = 2; + void use_pointers() { int *p; p = &device_var; @@ -64,8 +71,9 @@ void use_pointers() { // * constant unnamed string with the kernel name // ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00" // * constant unnamed string with GPU binary -// HIP: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin" // CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00", +// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00", +// HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin" // CUDANORDC-SAME: section ".nv_fatbin", align 8 // CUDARDC-SAME: section "__nv_relfatbin", align 8 // * constant struct that wraps GPU binary @@ -74,13 +82,14 @@ void use_pointers() { // CUDA-SAME: { i32 1180844977, i32 1, // HIP-SAME: { i32 1212764230, i32 1, // CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0), -// HIP-SAME: i8* @[[FATBIN]], +// HIPEF-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0), +// HIPNEF-SAME: i8* @[[FATBIN]], // ALL-SAME: i8* null } // CUDA-SAME: section ".nvFatBinSegment" // HIP-SAME: section ".hipFatBinSegment" // * variable to save GPU binary handle after initialization // CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null -// HIP: @__[[PREFIX]]_gpubin_handle = linkonce global i8** null +// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null // * constant unnamed string with NVModuleID // RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant // CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32 @@ -112,8 +121,8 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // ALL: call{{.*}}[[PREFIX]]RegisterFunction(i8** %0, {{.*}}kernelfunc // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0 // ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0 -// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{.*}}i32 0, i32 4, i32 0, i32 0 +// ALL-DAG: call{{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{.*}}i32 0, i32 4, i32 1, i32 0 // ALL: ret void // Test that we've built a constructor. @@ -157,7 +166,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // device-side globals, but we still need to register GPU binary. // Skip GPU binary string first. // CUDANOGLOBALS: @{{.*}} = private constant{{.*}} -// HIPNOGLOBALS: @{{.*}} = external constant{{.*}} +// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}} // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals // NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor // NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper diff --git a/test/CodeGenCUDA/device-var-init.cu b/test/CodeGenCUDA/device-var-init.cu index f96e42d9711c..af42e698cfe9 100644 --- a/test/CodeGenCUDA/device-var-init.cu +++ b/test/CodeGenCUDA/device-var-init.cu @@ -5,10 +5,12 @@ // variables, but accept empty constructors allowed by CUDA. // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ -// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,NVPTX %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=HOST %s // RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \ -// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,AMDGCN %s #ifdef __clang__ #include "Inputs/cuda.h" @@ -18,105 +20,140 @@ #include "Inputs/cuda-initializers.h" __device__ int d_v; -// CHECK: @d_v = addrspace(1) externally_initialized global i32 0, +// DEVICE: @d_v = addrspace(1) externally_initialized global i32 0, +// HOST: @d_v = internal global i32 undef, __shared__ int s_v; -// CHECK: @s_v = addrspace(3) global i32 undef, +// DEVICE: @s_v = addrspace(3) global i32 undef, +// HOST: @s_v = internal global i32 undef, __constant__ int c_v; -// CHECK: addrspace(4) externally_initialized global i32 0, +// DEVICE: addrspace(4) externally_initialized global i32 0, +// HOST: @c_v = internal global i32 undef, __device__ int d_v_i = 1; -// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, +// DEVICE: @d_v_i = addrspace(1) externally_initialized global i32 1, +// HOST: @d_v_i = internal global i32 undef, // trivial constructor -- allowed __device__ T d_t; -// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +// DEVICE: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +// HOST: @d_t = internal global %struct.T undef, __shared__ T s_t; -// CHECK: @s_t = addrspace(3) global %struct.T undef, +// DEVICE: @s_t = addrspace(3) global %struct.T undef, +// HOST: @s_t = internal global %struct.T undef, __constant__ T c_t; -// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, +// DEVICE: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, +// HOST: @c_t = internal global %struct.T undef, __device__ T d_t_i = {2}; -// CHECK: @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +// DEVICE: @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +// HOST: @d_t_i = internal global %struct.T undef, __constant__ T c_t_i = {2}; -// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, +// DEVICE: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, +// HOST: @c_t_i = internal global %struct.T undef, // empty constructor __device__ EC d_ec; -// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +// DEVICE: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +// HOST: @d_ec = internal global %struct.EC undef, __shared__ EC s_ec; -// CHECK: @s_ec = addrspace(3) global %struct.EC undef, +// DEVICE: @s_ec = addrspace(3) global %struct.EC undef, +// HOST: @s_ec = internal global %struct.EC undef, __constant__ EC c_ec; -// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, +// DEVICE: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, +// HOST: @c_ec = internal global %struct.EC undef // empty destructor __device__ ED d_ed; -// CHECK: @d_ed = addrspace(1) externally_initialized global %struct.ED zeroinitializer, +// DEVICE: @d_ed = addrspace(1) externally_initialized global %struct.ED zeroinitializer, +// HOST: @d_ed = internal global %struct.ED undef, __shared__ ED s_ed; -// CHECK: @s_ed = addrspace(3) global %struct.ED undef, +// DEVICE: @s_ed = addrspace(3) global %struct.ED undef, +// HOST: @s_ed = internal global %struct.ED undef, __constant__ ED c_ed; -// CHECK: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer, +// DEVICE: @c_ed = addrspace(4) externally_initialized global %struct.ED zeroinitializer, +// HOST: @c_ed = internal global %struct.ED undef, __device__ ECD d_ecd; -// CHECK: @d_ecd = addrspace(1) externally_initialized global %struct.ECD zeroinitializer, +// DEVICE: @d_ecd = addrspace(1) externally_initialized global %struct.ECD zeroinitializer, +// HOST: @d_ecd = internal global %struct.ECD undef, __shared__ ECD s_ecd; -// CHECK: @s_ecd = addrspace(3) global %struct.ECD undef, +// DEVICE: @s_ecd = addrspace(3) global %struct.ECD undef, +// HOST: @s_ecd = internal global %struct.ECD undef, __constant__ ECD c_ecd; -// CHECK: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer, +// DEVICE: @c_ecd = addrspace(4) externally_initialized global %struct.ECD zeroinitializer, +// HOST: @c_ecd = internal global %struct.ECD undef, // empty templated constructor -- allowed with no arguments __device__ ETC d_etc; -// CHECK: @d_etc = addrspace(1) externally_initialized global %struct.ETC zeroinitializer, +// DEVICE: @d_etc = addrspace(1) externally_initialized global %struct.ETC zeroinitializer, +// HOST: @d_etc = internal global %struct.ETC undef, __shared__ ETC s_etc; -// CHECK: @s_etc = addrspace(3) global %struct.ETC undef, +// DEVICE: @s_etc = addrspace(3) global %struct.ETC undef, +// HOST: @s_etc = internal global %struct.ETC undef, __constant__ ETC c_etc; -// CHECK: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer, +// DEVICE: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer, +// HOST: @c_etc = internal global %struct.ETC undef, __device__ NCFS d_ncfs; -// CHECK: @d_ncfs = addrspace(1) externally_initialized global %struct.NCFS { i32 3 } +// DEVICE: @d_ncfs = addrspace(1) externally_initialized global %struct.NCFS { i32 3 } +// HOST: @d_ncfs = internal global %struct.NCFS undef, __constant__ NCFS c_ncfs; -// CHECK: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 } +// DEVICE: @c_ncfs = addrspace(4) externally_initialized global %struct.NCFS { i32 3 } +// HOST: @c_ncfs = internal global %struct.NCFS undef, // Regular base class -- allowed __device__ T_B_T d_t_b_t; -// CHECK: @d_t_b_t = addrspace(1) externally_initialized global %struct.T_B_T zeroinitializer, +// DEVICE: @d_t_b_t = addrspace(1) externally_initialized global %struct.T_B_T zeroinitializer, +// HOST: @d_t_b_t = internal global %struct.T_B_T undef, __shared__ T_B_T s_t_b_t; -// CHECK: @s_t_b_t = addrspace(3) global %struct.T_B_T undef, +// DEVICE: @s_t_b_t = addrspace(3) global %struct.T_B_T undef, +// HOST: @s_t_b_t = internal global %struct.T_B_T undef, __constant__ T_B_T c_t_b_t; -// CHECK: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer, +// DEVICE: @c_t_b_t = addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer, +// HOST: @c_t_b_t = internal global %struct.T_B_T undef, // Incapsulated object of allowed class -- allowed __device__ T_F_T d_t_f_t; -// CHECK: @d_t_f_t = addrspace(1) externally_initialized global %struct.T_F_T zeroinitializer, +// DEVICE: @d_t_f_t = addrspace(1) externally_initialized global %struct.T_F_T zeroinitializer, +// HOST: @d_t_f_t = internal global %struct.T_F_T undef, __shared__ T_F_T s_t_f_t; -// CHECK: @s_t_f_t = addrspace(3) global %struct.T_F_T undef, +// DEVICE: @s_t_f_t = addrspace(3) global %struct.T_F_T undef, +// HOST: @s_t_f_t = internal global %struct.T_F_T undef, __constant__ T_F_T c_t_f_t; -// CHECK: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer, +// DEVICE: @c_t_f_t = addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer, +// HOST: @c_t_f_t = internal global %struct.T_F_T undef, // array of allowed objects -- allowed __device__ T_FA_T d_t_fa_t; -// CHECK: @d_t_fa_t = addrspace(1) externally_initialized global %struct.T_FA_T zeroinitializer, +// DEVICE: @d_t_fa_t = addrspace(1) externally_initialized global %struct.T_FA_T zeroinitializer, +// HOST: @d_t_fa_t = internal global %struct.T_FA_T undef, __shared__ T_FA_T s_t_fa_t; -// CHECK: @s_t_fa_t = addrspace(3) global %struct.T_FA_T undef, +// DEVICE: @s_t_fa_t = addrspace(3) global %struct.T_FA_T undef, +// HOST: @s_t_fa_t = internal global %struct.T_FA_T undef, __constant__ T_FA_T c_t_fa_t; -// CHECK: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer, +// DEVICE: @c_t_fa_t = addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer, +// HOST: @c_t_fa_t = internal global %struct.T_FA_T undef, // Calling empty base class initializer is OK __device__ EC_I_EC d_ec_i_ec; -// CHECK: @d_ec_i_ec = addrspace(1) externally_initialized global %struct.EC_I_EC zeroinitializer, +// DEVICE: @d_ec_i_ec = addrspace(1) externally_initialized global %struct.EC_I_EC zeroinitializer, +// HOST: @d_ec_i_ec = internal global %struct.EC_I_EC undef, __shared__ EC_I_EC s_ec_i_ec; -// CHECK: @s_ec_i_ec = addrspace(3) global %struct.EC_I_EC undef, +// DEVICE: @s_ec_i_ec = addrspace(3) global %struct.EC_I_EC undef, +// HOST: @s_ec_i_ec = internal global %struct.EC_I_EC undef, __constant__ EC_I_EC c_ec_i_ec; -// CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, +// DEVICE: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer, +// HOST: @c_ec_i_ec = internal global %struct.EC_I_EC undef, -// CHECK: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef -// CHECK: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef +// DEVICE: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef +// DEVICE: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef -// CHECK: @_ZZ2dfvE11const_array = internal addrspace(4) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5] -// CHECK: @_ZZ2dfvE9const_int = internal addrspace(4) constant i32 123 +// DEVICE: @_ZZ2dfvE11const_array = internal addrspace(4) constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5] +// DEVICE: @_ZZ2dfvE9const_int = internal addrspace(4) constant i32 123 // We should not emit global initializers for device-side variables. -// CHECK-NOT: @__cxx_global_var_init +// DEVICE-NOT: @__cxx_global_var_init // Make sure that initialization restrictions do not apply to local // variables. @@ -171,90 +208,90 @@ __device__ void df() { // AMDGCN: %[[t_fa_ned:.*]] = addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED* T t; - // CHECK-NOT: call + // DEVICE-NOT: call EC ec; - // CHECK: call void @_ZN2ECC1Ev(%struct.EC* %[[ec]]) + // DEVICE: call void @_ZN2ECC1Ev(%struct.EC* %[[ec]]) ED ed; - // CHECK-NOT: call + // DEVICE-NOT: call ECD ecd; - // CHECK: call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]]) + // DEVICE: call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]]) ETC etc; - // CHECK: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]]) + // DEVICE: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]]) UC uc; // undefined constructor -- not allowed - // CHECK: call void @_ZN2UCC1Ev(%struct.UC* %[[uc]]) + // DEVICE: call void @_ZN2UCC1Ev(%struct.UC* %[[uc]]) UD ud; // undefined destructor -- not allowed - // CHECK-NOT: call + // DEVICE-NOT: call ECI eci; // empty constructor w/ initializer list -- not allowed - // CHECK: call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]]) + // DEVICE: call void @_ZN3ECIC1Ev(%struct.ECI* %[[eci]]) NEC nec; // non-empty constructor -- not allowed - // CHECK: call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]]) + // DEVICE: call void @_ZN3NECC1Ev(%struct.NEC* %[[nec]]) // non-empty destructor -- not allowed NED ned; // no-constructor, virtual method -- not allowed - // CHECK: call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]]) + // DEVICE: call void @_ZN3NCVC1Ev(%struct.NCV* %[[ncv]]) NCV ncv; - // CHECK-NOT: call + // DEVICE-NOT: call VD vd; - // CHECK: call void @_ZN2VDC1Ev(%struct.VD* %[[vd]]) + // DEVICE: call void @_ZN2VDC1Ev(%struct.VD* %[[vd]]) NCF ncf; - // CHECK: call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]]) + // DEVICE: call void @_ZN3NCFC1Ev(%struct.NCF* %[[ncf]]) NCFS ncfs; - // CHECK: call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]]) + // DEVICE: call void @_ZN4NCFSC1Ev(%struct.NCFS* %[[ncfs]]) UTC utc; - // CHECK: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]]) + // DEVICE: call void @_ZN3UTCC1IJEEEDpT_(%struct.UTC* %[[utc]]) NETC netc; - // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]]) + // DEVICE: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %[[netc]]) T_B_T t_b_t; - // CHECK-NOT: call + // DEVICE-NOT: call T_F_T t_f_t; - // CHECK-NOT: call + // DEVICE-NOT: call T_FA_T t_fa_t; - // CHECK-NOT: call + // DEVICE-NOT: call EC_I_EC ec_i_ec; - // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]]) + // DEVICE: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %[[ec_i_ec]]) EC_I_EC1 ec_i_ec1; - // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]]) + // DEVICE: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %[[ec_i_ec1]]) T_V_T t_v_t; - // CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]]) + // DEVICE: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %[[t_v_t]]) T_B_NEC t_b_nec; - // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]]) + // DEVICE: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %[[t_b_nec]]) T_F_NEC t_f_nec; - // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]]) + // DEVICE: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %[[t_f_nec]]) T_FA_NEC t_fa_nec; - // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]]) + // DEVICE: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %[[t_fa_nec]]) T_B_NED t_b_ned; - // CHECK-NOT: call + // DEVICE-NOT: call T_F_NED t_f_ned; - // CHECK-NOT: call + // DEVICE-NOT: call T_FA_NED t_fa_ned; - // CHECK-NOT: call + // DEVICE-NOT: call static __shared__ EC s_ec; - // CHECK-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) + // DEVICE-NOT: call void @_ZN2ECC1Ev(%struct.EC* addrspacecast (%struct.EC addrspace(3)* @_ZZ2dfvE4s_ec to %struct.EC*)) static __shared__ ETC s_etc; - // CHECK-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) + // DEVICE-NOT: call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* addrspacecast (%struct.ETC addrspace(3)* @_ZZ2dfvE5s_etc to %struct.ETC*)) static const int const_array[] = {1, 2, 3, 4, 5}; static const int const_int = 123; // anchor point separating constructors and destructors - df(); // CHECK: call void @_Z2dfv() + df(); // DEVICE: call void @_Z2dfv() // Verify that we only call non-empty destructors - // CHECK-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]]) - // CHECK-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]]) - // CHECK-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]]) - // CHECK-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]]) - // CHECK-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]]) - // CHECK-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]]) - // CHECK-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]]) - // CHECK-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]]) + // DEVICE-NEXT: call void @_ZN8T_FA_NEDD1Ev(%struct.T_FA_NED* %[[t_fa_ned]]) + // DEVICE-NEXT: call void @_ZN7T_F_NEDD1Ev(%struct.T_F_NED* %[[t_f_ned]]) + // DEVICE-NEXT: call void @_ZN7T_B_NEDD1Ev(%struct.T_B_NED* %[[t_b_ned]]) + // DEVICE-NEXT: call void @_ZN2VDD1Ev(%struct.VD* %[[vd]]) + // DEVICE-NEXT: call void @_ZN3NEDD1Ev(%struct.NED* %[[ned]]) + // DEVICE-NEXT: call void @_ZN2UDD1Ev(%struct.UD* %[[ud]]) + // DEVICE-NEXT: call void @_ZN3ECDD1Ev(%struct.ECD* %[[ecd]]) + // DEVICE-NEXT: call void @_ZN2EDD1Ev(%struct.ED* %[[ed]]) - // CHECK-NEXT: ret void + // DEVICE-NEXT: ret void } // We should not emit global init function. -// CHECK-NOT: @_GLOBAL__sub_I +// DEVICE-NOT: @_GLOBAL__sub_I diff --git a/test/CodeGenCUDA/link-device-bitcode.cu b/test/CodeGenCUDA/link-device-bitcode.cu index b307838ae82d..69dc051355de 100644 --- a/test/CodeGenCUDA/link-device-bitcode.cu +++ b/test/CodeGenCUDA/link-device-bitcode.cu @@ -11,13 +11,19 @@ // // Make sure function in device-code gets linked in and internalized. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ +// RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s \ +// RUN: | FileCheck %s -check-prefix CHECK-IR + +// Make sure legacy flag name works +// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ // RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \ // RUN: -disable-llvm-passes -o - %s \ // RUN: | FileCheck %s -check-prefix CHECK-IR // // Make sure we can link two bitcode files. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ -// RUN: -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \ +// RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \ // RUN: -emit-llvm -disable-llvm-passes -o - %s \ // RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2 // @@ -30,7 +36,7 @@ // // Make sure NVVMReflect pass is enabled in NVPTX back-end. // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ -// RUN: -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \ +// RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \ // RUN: -mllvm -debug-pass=Structure 2>&1 \ // RUN: | FileCheck %s -check-prefix CHECK-REFLECT diff --git a/test/CodeGenCUDA/propagate-metadata.cu b/test/CodeGenCUDA/propagate-metadata.cu index 1616cb933748..773dd8afba81 100644 --- a/test/CodeGenCUDA/propagate-metadata.cu +++ b/test/CodeGenCUDA/propagate-metadata.cu @@ -1,5 +1,5 @@ // Check that when we link a bitcode module into a file using -// -mlink-cuda-bitcode, we apply the same attributes to the functions in that +// -mlink-builtin-bitcode, we apply the same attributes to the functions in that // bitcode module as we apply to functions we generate. // // In particular, we check that ftz and unsafe-math are propagated into the @@ -14,17 +14,17 @@ // RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \ // RUN: %s -o %t.bc -triple nvptx-unknown-unknown -// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc -o - \ +// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \ // RUN: -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST -// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \ +// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \ // RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \ // RUN: -fcuda-is-device -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ \ // RUN: --check-prefix=NOFAST -// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \ +// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \ // RUN: -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \ // RUN: -fcuda-is-device -menable-unsafe-fp-math -triple nvptx-unknown-unknown \ // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST diff --git a/test/CodeGenCUDA/usual-deallocators.cu b/test/CodeGenCUDA/usual-deallocators.cu new file mode 100644 index 000000000000..2d97c8c9f7de --- /dev/null +++ b/test/CodeGenCUDA/usual-deallocators.cu @@ -0,0 +1,133 @@ +// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \ +// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,DEVICE +// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown \ +// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,HOST +// RUN: %clang_cc1 %s --std=c++17 -triple nvptx-unknown-unknown -fcuda-is-device \ +// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,DEVICE +// RUN: %clang_cc1 %s --std=c++17 -triple nvptx-unknown-unknown \ +// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=COMMON,HOST + +#include "Inputs/cuda.h" +extern "C" __host__ void host_fn(); +extern "C" __device__ void dev_fn(); +extern "C" __host__ __device__ void hd_fn(); + +struct H1D1 { + __host__ void operator delete(void *) { host_fn(); }; + __device__ void operator delete(void *) { dev_fn(); }; +}; + +struct H1D2 { + __host__ void operator delete(void *) { host_fn(); }; + __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); }; +}; + +struct H2D1 { + __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); }; + __device__ void operator delete(void *) { dev_fn(); }; +}; + +struct H2D2 { + __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); }; + __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); }; +}; + +struct H1D1D2 { + __host__ void operator delete(void *) { host_fn(); }; + __device__ void operator delete(void *) { dev_fn(); }; + __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); }; +}; + +struct H1H2D1 { + __host__ void operator delete(void *) { host_fn(); }; + __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); }; + __device__ void operator delete(void *) { dev_fn(); }; +}; + +struct H1H2D2 { + __host__ void operator delete(void *) { host_fn(); }; + __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); }; + __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); }; +}; + +struct H1H2D1D2 { + __host__ void operator delete(void *) { host_fn(); }; + __host__ void operator delete(void *, __SIZE_TYPE__) { host_fn(); }; + __device__ void operator delete(void *) { dev_fn(); }; + __device__ void operator delete(void *, __SIZE_TYPE__) { dev_fn(); }; +}; + + +template <typename T> +__host__ __device__ void test_hd(void *p) { + T *t = (T *)p; + delete t; +} + +// Make sure we call the right variant of usual deallocator. +__host__ __device__ void tests_hd(void *t) { + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H1D1EvPv + // COMMON: call void @_ZN4H1D1dlEPv + test_hd<H1D1>(t); + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H1D2EvPv + // DEVICE: call void @_ZN4H1D2dlEPvj(i8* {{.*}}, i32 1) + // HOST: call void @_ZN4H1D2dlEPv(i8* {{.*}}) + test_hd<H1D2>(t); + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H2D1EvPv + // DEVICE: call void @_ZN4H2D1dlEPv(i8* {{.*}}) + // HOST: call void @_ZN4H2D1dlEPvj(i8* %3, i32 1) + test_hd<H2D1>(t); + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI4H2D2EvPv + // COMMON: call void @_ZN4H2D2dlEPvj(i8* {{.*}}, i32 1) + test_hd<H2D2>(t); + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI6H1D1D2EvPv + // COMMON: call void @_ZN6H1D1D2dlEPv(i8* %3) + test_hd<H1D1D2>(t); + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI6H1H2D1EvPv + // COMMON: call void @_ZN6H1H2D1dlEPv(i8* {{.*}}) + test_hd<H1H2D1>(t); + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI6H1H2D2EvPv + // DEVICE: call void @_ZN6H1H2D2dlEPvj(i8* {{.*}}, i32 1) + // HOST: call void @_ZN6H1H2D2dlEPv(i8* {{.*}}) + test_hd<H1H2D2>(t); + // COMMON-LABEL: define linkonce_odr void @_Z7test_hdI8H1H2D1D2EvPv + // COMMON: call void @_ZN8H1H2D1D2dlEPv(i8* {{.*}}) + test_hd<H1H2D1D2>(t); +} + +// Make sure we've picked deallocator for the correct side of compilation. + +// COMMON-LABEL: define linkonce_odr void @_ZN4H1D1dlEPv(i8*) +// DEVICE: call void @dev_fn() +// HOST: call void @host_fn() + +// DEVICE-LABEL: define linkonce_odr void @_ZN4H1D2dlEPvj(i8*, i32) +// DEVICE: call void @dev_fn() +// HOST-LABEL: define linkonce_odr void @_ZN4H1D2dlEPv(i8*) +// HOST: call void @host_fn() + +// DEVICE-LABEL: define linkonce_odr void @_ZN4H2D1dlEPv(i8*) +// DEVICE: call void @dev_fn() +// HOST-LABEL: define linkonce_odr void @_ZN4H2D1dlEPvj(i8*, i32) +// HOST: call void @host_fn() + +// COMMON-LABEL: define linkonce_odr void @_ZN4H2D2dlEPvj(i8*, i32) +// DEVICE: call void @dev_fn() +// HOST: call void @host_fn() + +// COMMON-LABEL: define linkonce_odr void @_ZN6H1D1D2dlEPv(i8*) +// DEVICE: call void @dev_fn() +// HOST: call void @host_fn() + +// COMMON-LABEL: define linkonce_odr void @_ZN6H1H2D1dlEPv(i8*) +// DEVICE: call void @dev_fn() +// HOST: call void @host_fn() + +// DEVICE-LABEL: define linkonce_odr void @_ZN6H1H2D2dlEPvj(i8*, i32) +// DEVICE: call void @dev_fn() +// HOST-LABEL: define linkonce_odr void @_ZN6H1H2D2dlEPv(i8*) +// HOST: call void @host_fn() + +// COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(i8*) +// DEVICE: call void @dev_fn() +// HOST: call void @host_fn() |
