diff options
Diffstat (limited to 'test/CodeGenOpenCL')
22 files changed, 677 insertions, 228 deletions
diff --git a/test/CodeGenOpenCL/addr-space-struct-arg.cl b/test/CodeGenOpenCL/addr-space-struct-arg.cl index f8d7073f92ee..6f923b7fd403 100644 --- a/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -1,6 +1,9 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s -// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s + +typedef int int2 __attribute__((ext_vector_type(2))); typedef struct { int cells[9]; @@ -130,6 +133,12 @@ kernel void KernelOneMember(struct StructOneMember u) { FuncOneMember(u); } +// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32 +// SPIR-NOT: addrspacecast +kernel void KernelOneMemberSpir(global struct StructOneMember* u) { + FuncOneMember(*u); +} + // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember( // AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5) // AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8 diff --git a/test/CodeGenOpenCL/address-spaces.cl b/test/CodeGenOpenCL/address-spaces.cl index 60f5e30d328b..3c8fea2a80bc 100644 --- a/test/CodeGenOpenCL/address-spaces.cl +++ b/test/CodeGenOpenCL/address-spaces.cl @@ -19,7 +19,7 @@ struct S { // CL20-DAG: @g_static_var = internal addrspace(1) global float 0.000000e+00 #ifdef CL20 -// CL20-DAG: @g_s = common {{(dso_local )?}}addrspace(1) global %struct.S zeroinitializer +// CL20-DAG: @g_s = {{(common )?}}{{(dso_local )?}}addrspace(1) global %struct.S zeroinitializer struct S g_s; #endif @@ -55,7 +55,7 @@ void fc(constant int *arg) {} int i; // CL20-DAG: @i = common {{(dso_local )?}}addrspace(1) global i32 0 int *ptr; -// CL20SPIR-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32 addrspace(4)* null +// CL20SPIR-DAG: @ptr = {{(common )?}}{{(dso_local )?}}addrspace(1) global i32 addrspace(4)* null // CL20AMDGCN-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32* null #endif diff --git a/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl index 75fceccb2a45..875c35c048dd 100644 --- a/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl +++ b/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s --check-prefix=CHECK typedef struct {int a;} ndrange_t; @@ -36,23 +36,23 @@ kernel void test(global char *a, char b, global long *c, long d) { enqueue_kernel(default_queue, flags, ndrange, block); } -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }>) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>) // CHECK-SAME: #[[ATTR:[0-9]+]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} // CHECK: entry: -// CHECK: %1 = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5) -// CHECK: store <{ i32, i32, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8 -// CHECK: %2 = addrspacecast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8* +// CHECK: %1 = alloca <{ i32, i32, i8*, i8 addrspace(1)*, i8 }>, align 8, addrspace(5) +// CHECK: store <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8 +// CHECK: %2 = addrspacecast <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8* // CHECK: call void @__test_block_invoke(i8* %2) // CHECK: ret void // CHECK:} -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} -// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i64, i64 addrspace(1)* }>) +// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>) // CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}} // CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" } diff --git a/test/CodeGenOpenCL/amdgpu-features.cl b/test/CodeGenOpenCL/amdgpu-features.cl index a1815cedcd49..7aac4d3a3685 100644 --- a/test/CodeGenOpenCL/amdgpu-features.cl +++ b/test/CodeGenOpenCL/amdgpu-features.cl @@ -5,8 +5,16 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx801 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX801 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx700 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX700 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s -// GFX904: "target-features"="+16-bit-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" -// GFX906: "target-features"="+16-bit-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts" +// GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals" +// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals" +// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals" kernel void test() {} diff --git a/test/CodeGenOpenCL/amdgpu-nullptr.cl b/test/CodeGenOpenCL/amdgpu-nullptr.cl index 688d3a58e90b..c7c77920b775 100644 --- a/test/CodeGenOpenCL/amdgpu-nullptr.cl +++ b/test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -143,7 +143,7 @@ void test_static_var_local(void) { // NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp3, align 4 // NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp4, align 4 // NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)* -// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_private.SS1 to i8 addrspace(4)*), i64 32, i1 false) +// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @__const.test_func_scope_var_private.SS1 to i8 addrspace(4)*), i64 32, i1 false) // NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)* // NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false) void test_func_scope_var_private(void) { @@ -163,7 +163,7 @@ void test_func_scope_var_private(void) { // NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp3, align 4 // NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp4, align 4 // NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)* -// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_local.SS1 to i8 addrspace(4)*), i64 32, i1 false) +// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @__const.test_func_scope_var_local.SS1 to i8 addrspace(4)*), i64 32, i1 false) // NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)* // NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false) void test_func_scope_var_local(void) { diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl index 80ac5727b094..675240c6f05d 100644 --- a/test/CodeGenOpenCL/blocks.cl +++ b/test/CodeGenOpenCL/blocks.cl @@ -1,7 +1,12 @@ -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s -// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple spir-unknown-unknown | FileCheck -check-prefixes=CHECK-DEBUG %s +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=CHECK-DEBUG %s -// COMMON: @__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 } +// SPIR: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } +// AMDGCN: %struct.__opencl_block_literal_generic = type { i32, i32, i8* } +// SPIR: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) } +// AMDGCN: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8* } { i32 16, i32 8, i8* bitcast (void (i8*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) } // COMMON-NOT: .str // SPIR-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a) @@ -17,32 +22,44 @@ void foo(){ // COMMON-NOT: %block.flags // COMMON-NOT: %block.reserved // COMMON-NOT: %block.descriptor - // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block:.*]], i32 0, i32 0 - // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 0 - // SPIR: store i32 12, i32* %[[block_size]] - // AMDGCN: store i32 12, i32 addrspace(5)* %[[block_size]] - // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 1 - // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 1 + // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 0 + // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 0 + // SPIR: store i32 16, i32* %[[block_size]] + // AMDGCN: store i32 20, i32 addrspace(5)* %[[block_size]] + // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 1 + // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %block, i32 0, i32 1 // SPIR: store i32 4, i32* %[[block_align]] - // AMDGCN: store i32 4, i32 addrspace(5)* %[[block_align]] - // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2 + // AMDGCN: store i32 8, i32 addrspace(5)* %[[block_align]] + // SPIR: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block:.*]], i32 0, i32 2 + // SPIR: store i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %[[block_invoke]] + // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3 // SPIR: %[[i_value:.*]] = load i32, i32* %i // SPIR: store i32 %[[i_value]], i32* %[[block_captured]], - // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }>* %[[block]] to i32 ()* + // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()* // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)* // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]], - // SPIR: %[[block_literal:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] - // SPIR: %[[blk_gen_ptr:.*]] = bitcast i32 () addrspace(4)* %[[block_literal]] to i8 addrspace(4)* - // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]]) - // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 2 + // SPIR: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]] + // SPIR: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)* + // SPIR: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2 + // SPIR: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)* + // SPIR: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]] + // SPIR: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)* + // SPIR: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]]) + // AMDGCN: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 2 + // AMDGCN: store i8* bitcast (i32 (i8*)* @__foo_block_invoke to i8*), i8* addrspace(5)* %[[block_invoke]] + // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]], i32 0, i32 3 // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]], - // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* + // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8*, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)* // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()* // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]], - // AMDGCN: %[[block_literal:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] - // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast i32 ()* %[[block_literal]] to i8* - // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]]) + // AMDGCN: %[[blk_gen_ptr:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]] + // AMDGCN: %[[block_literal:.*]] = bitcast i32 ()* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic* + // AMDGCN: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic* %[[block_literal]], i32 0, i32 2 + // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic* %[[block_literal]] to i8* + // AMDGCN: %[[invoke_func_ptr:.*]] = load i8*, i8** %[[invoke_addr]] + // AMDGCN: %[[invoke_func:.*]] = bitcast i8* %[[invoke_func_ptr]] to i32 (i8*)* + // AMDGCN: call {{.*}}i32 %[[invoke_func]](i8* %[[blk_gen_ptr]]) int (^ block_B)(void) = ^{ return i; @@ -51,20 +68,40 @@ void foo(){ } // SPIR-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) -// SPIR: %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 }> addrspace(4)* -// SPIR: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(4)* %[[block]], i32 0, i32 2 +// SPIR: %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* +// SPIR: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* %[[block]], i32 0, i32 3 // SPIR: %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]] // AMDGCN-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8* %.block_descriptor) -// AMDGCN: %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i32 }>* -// AMDGCN: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2 +// AMDGCN: %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i8*, i32 }>* +// AMDGCN: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8*, i32 }>, <{ i32, i32, i8*, i32 }>* %[[block]], i32 0, i32 3 // AMDGCN: %[[block_capture:.*]] = load i32, i32* %[[block_capture_addr]] // COMMON-NOT: define{{.*}}@__foo_block_invoke_kernel -// COMMON: !DIDerivedType(tag: DW_TAG_member, name: "__size" -// COMMON: !DIDerivedType(tag: DW_TAG_member, name: "__align" +// Test that we support block arguments. +// COMMON-LABEL: define {{.*}} @blockArgFunc +int blockArgFunc(int (^ bl)(void)) { + return bl(); +} + +// COMMON-LABEL: define {{.*}} @get21 +// COMMON: define {{.*}} @__get21_block_invoke +// COMMON: ret i32 21 +int get21() { + return blockArgFunc(^{return 21;}); +} + +// COMMON-LABEL: define {{.*}} @get42 +// COMMON: define {{.*}} @__get42_block_invoke +// COMMON: ret i32 42 +int get42() { + return blockArgFunc(^{return 42;}); +} + +// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__size" +// CHECK-DEBUG: !DIDerivedType(tag: DW_TAG_member, name: "__align" -// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__isa" -// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__flags" -// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__reserved" -// COMMON-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__FuncPtr" +// CHECK-DEBUG-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__isa" +// CHECK-DEBUG-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__flags" +// CHECK-DEBUG-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__reserved" +// CHECK-DEBUG-NOT: !DIDerivedType(tag: DW_TAG_member, name: "__FuncPtr" diff --git a/test/CodeGenOpenCL/builtins-amdgcn-ci.cl b/test/CodeGenOpenCL/builtins-amdgcn-ci.cl new file mode 100644 index 000000000000..41275268dbb4 --- /dev/null +++ b/test/CodeGenOpenCL/builtins-amdgcn-ci.cl @@ -0,0 +1,19 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu hawaii -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu fiji -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: @test_s_dcache_inv_vol +// CHECK: call void @llvm.amdgcn.s.dcache.inv.vol( +void test_s_dcache_inv_vol() +{ + __builtin_amdgcn_s_dcache_inv_vol(); +} + +// CHECK-LABEL: @test_buffer_wbinvl1_vol +// CHECK: call void @llvm.amdgcn.buffer.wbinvl1.vol() +void test_buffer_wbinvl1_vol() +{ + __builtin_amdgcn_buffer_wbinvl1_vol(); +} + diff --git a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl index ca3f4006e300..e2c03a471baa 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl @@ -12,24 +12,24 @@ kernel void builtins_amdgcn_dl_insts_err( half2 v2hA, half2 v2hB, float fC, short2 v2ssA, short2 v2ssB, int siA, int siB, int siC, ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) { - fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}} - fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}} + fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}} + fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}} - siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}} - siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}} + siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}} + siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}} - uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}} - uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}} + uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}} + uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}} - siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}} - siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}} + siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}} + siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}} - uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}} - uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}} + uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}} + uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}} - siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}} - siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}} + siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}} + siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}} - uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}} - uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}} + uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}} + uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}} } diff --git a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index afa312cfcb12..220142122435 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -82,13 +82,27 @@ void test_s_memrealtime(global ulong* out) *out = __builtin_amdgcn_s_memrealtime(); } +// CHECK-LABEL: @test_s_dcache_wb() +// CHECK: call void @llvm.amdgcn.s.dcache.wb() +void test_s_dcache_wb() +{ + __builtin_amdgcn_s_dcache_wb(); +} + // CHECK-LABEL: @test_mov_dpp -// CHECK: call i32 @llvm.amdgcn.mov.dpp.i32(i32 %src, i32 0, i32 0, i32 0, i1 false) +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %src, i32 0, i32 0, i32 0, i1 false) void test_mov_dpp(global int* out, int src) { *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false); } +// CHECK-LABEL: @test_update_dpp +// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false) +void test_update_dpp(global int* out, int arg1, int arg2) +{ + *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false); +} + // CHECK-LABEL: @test_ds_fadd // CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false) void test_ds_faddf(local float *out, float src) { diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl index 2015f36e93dc..dc7f480209af 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1,6 +1,5 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-opencl -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -20,19 +19,42 @@ void test_div_scale_f64(global double* out, global int* flagout, double a, doubl *flagout = flag; } -// CHECK-LABEL: @test_div_scale_f32 +// CHECK-LABEL: @test_div_scale_f32( // CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) // CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 // CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 -// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 -// CHECK: store i32 [[FLAGEXT]] -void test_div_scale_f32(global float* out, global int* flagout, float a, float b) +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8 +// CHECK: store i8 [[FLAGEXT]] +void test_div_scale_f32(global float* out, global bool* flagout, float a, float b) { bool flag; *out = __builtin_amdgcn_div_scalef(a, b, true, &flag); *flagout = flag; } +// CHECK-LABEL: @test_div_scale_f32_global_ptr( +// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8 +// CHECK: store i8 [[FLAGEXT]] +void test_div_scale_f32_global_ptr(global float* out, global int* flagout, float a, float b, global bool* flag) +{ + *out = __builtin_amdgcn_div_scalef(a, b, true, flag); +} + +// CHECK-LABEL: @test_div_scale_f32_generic_ptr( +// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) +// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 +// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 +// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i8 +// CHECK: store i8 [[FLAGEXT]] +void test_div_scale_f32_generic_ptr(global float* out, global int* flagout, float a, float b, global bool* flag_arg) +{ + generic bool* flag = flag_arg; + *out = __builtin_amdgcn_div_scalef(a, b, true, flag); +} + // CHECK-LABEL: @test_div_fmas_f32 // CHECK: call float @llvm.amdgcn.div.fmas.f32 void test_div_fmas_f32(global float* out, float a, float b, float c, int d) @@ -414,42 +436,49 @@ void test_cubema(global float* out, float a, float b, float c) { } // CHECK-LABEL: @test_read_exec( -// CHECK: call i64 @llvm.read_register.i64(metadata ![[EXEC:[0-9]+]]) #[[READ_EXEC_ATTRS:[0-9]+]] +// CHECK: call i64 @llvm.read_register.i64(metadata ![[$EXEC:[0-9]+]]) #[[$READ_EXEC_ATTRS:[0-9]+]] void test_read_exec(global ulong* out) { *out = __builtin_amdgcn_read_exec(); } -// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]] +// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_lo( -// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_LO:[0-9]+]]) #[[READ_EXEC_ATTRS]] +// CHECK: call i32 @llvm.read_register.i32(metadata ![[$EXEC_LO:[0-9]+]]) #[[$READ_EXEC_ATTRS]] void test_read_exec_lo(global uint* out) { *out = __builtin_amdgcn_read_exec_lo(); } // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.read_register.i32(metadata ![[EXEC_HI:[0-9]+]]) #[[READ_EXEC_ATTRS]] +// CHECK: call i32 @llvm.read_register.i32(metadata ![[$EXEC_HI:[0-9]+]]) #[[$READ_EXEC_ATTRS]] void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); } // CHECK-LABEL: @test_dispatch_ptr // CHECK: call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -void test_dispatch_ptr(__attribute__((address_space(4))) unsigned char ** out) +void test_dispatch_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_dispatch_ptr(); } +// CHECK-LABEL: @test_queue_ptr +// CHECK: call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() +void test_queue_ptr(__constant unsigned char ** out) +{ + *out = __builtin_amdgcn_queue_ptr(); +} + // CHECK-LABEL: @test_kernarg_segment_ptr // CHECK: call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -void test_kernarg_segment_ptr(__attribute__((address_space(4))) unsigned char ** out) +void test_kernarg_segment_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_kernarg_segment_ptr(); } // CHECK-LABEL: @test_implicitarg_ptr // CHECK: call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() -void test_implicitarg_ptr(__attribute__((address_space(4))) unsigned char ** out) +void test_implicitarg_ptr(__constant unsigned char ** out) { *out = __builtin_amdgcn_implicitarg_ptr(); } @@ -480,9 +509,9 @@ void test_s_getreg(volatile global uint *out) } // CHECK-LABEL: @test_get_local_id( -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[WI_RANGE:![0-9]*]] -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[WI_RANGE]] -// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[WI_RANGE]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[$WI_RANGE:![0-9]*]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[$WI_RANGE]] +// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[$WI_RANGE]] void test_get_local_id(int d, global int *out) { switch (d) { @@ -507,9 +536,9 @@ void test_s_getpc(global ulong* out) *out = __builtin_amdgcn_s_getpc(); } -// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} -// CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } -// CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } -// CHECK-DAG: ![[EXEC]] = !{!"exec"} -// CHECK-DAG: ![[EXEC_LO]] = !{!"exec_lo"} -// CHECK-DAG: ![[EXEC_HI]] = !{!"exec_hi"} +// CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} +// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } +// CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } +// CHECK-DAG: ![[$EXEC]] = !{!"exec"} +// CHECK-DAG: ![[$EXEC_LO]] = !{!"exec_lo"} +// CHECK-DAG: ![[$EXEC_HI]] = !{!"exec_hi"} diff --git a/test/CodeGenOpenCL/builtins.cl b/test/CodeGenOpenCL/builtins.cl new file mode 100644 index 000000000000..3fba83dcf5dc --- /dev/null +++ b/test/CodeGenOpenCL/builtins.cl @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 %s -finclude-default-header -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s + +void testBranchingOnEnqueueKernel(queue_t default_queue, unsigned flags, ndrange_t ndrange) { + // Ensure `enqueue_kernel` can be branched upon. + + if (enqueue_kernel(default_queue, flags, ndrange, ^(void) {})) + (void)0; + // CHECK: [[P:%[0-9]+]] = call i32 @__enqueue_kernel + // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0 + // CHECK-NEXT: br i1 [[Q]] + + if (get_kernel_work_group_size(^(void) {})) + (void)0; + // CHECK: [[P:%[0-9]+]] = call i32 @__get_kernel_work_group_size + // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0 + // CHECK-NEXT: br i1 [[Q]] + + if (get_kernel_preferred_work_group_size_multiple(^(void) {})) + (void)0; + // CHECK: [[P:%[0-9]+]] = call i32 @__get_kernel_preferred_work_group_size_multiple_impl + // CHECK-NEXT: [[Q:%[a-z0-9]+]] = icmp ne i32 [[P]], 0 + // CHECK-NEXT: br i1 [[Q]] +} + +void testBranchinOnPipeOperations(read_only pipe int r, write_only pipe int w, global int* ptr) { + // Verify that return type is correctly casted to i1 value. + + if (read_pipe(r, ptr)) + (void)0; + // CHECK: [[R:%[0-9]+]] = call i32 @__read_pipe_2 + // CHECK-NEXT: icmp ne i32 [[R]], 0 + + if (write_pipe(w, ptr)) + (void)0; + // CHECK: [[R:%[0-9]+]] = call i32 @__write_pipe_2 + // CHECK-NEXT: icmp ne i32 [[R]], 0 + + if (get_pipe_num_packets(r)) + (void)0; + // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_num_packets_ro + // CHECK-NEXT: icmp ne i32 [[R]], 0 + + if (get_pipe_num_packets(w)) + (void)0; + // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_num_packets_wo + // CHECK-NEXT: icmp ne i32 [[R]], 0 + + if (get_pipe_max_packets(r)) + (void)0; + // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_max_packets_ro + // CHECK-NEXT: icmp ne i32 [[R]], 0 + + if (get_pipe_max_packets(w)) + (void)0; + // CHECK: [[R:%[0-9]+]] = call i32 @__get_pipe_max_packets_wo + // CHECK-NEXT: icmp ne i32 [[R]], 0 +} + +void testBranchingOnAddressSpaceCast(generic long* ptr) { + // Verify that pointer types are properly casted, respecting address spaces. + + if (to_global(ptr)) + (void)0; + // CHECK: [[P:%[0-9]+]] = call [[GLOBAL_VOID:i8 addrspace\(1\)\*]] @__to_global([[GENERIC_VOID:i8 addrspace\(4\)\*]] {{%[0-9]+}}) + // CHECK-NEXT: [[Q:%[0-9]+]] = bitcast [[GLOBAL_VOID]] [[P]] to [[GLOBAL_i64:i64 addrspace\(1\)\*]] + // CHECK-NEXT: [[BOOL:%[a-z0-9]+]] = icmp ne [[GLOBAL_i64]] [[Q]], null + // CHECK-NEXT: br i1 [[BOOL]] + + if (to_local(ptr)) + (void)0; + // CHECK: [[P:%[0-9]+]] = call [[LOCAL_VOID:i8 addrspace\(3\)\*]] @__to_local([[GENERIC_VOID]] {{%[0-9]+}}) + // CHECK-NEXT: [[Q:%[0-9]+]] = bitcast [[LOCAL_VOID]] [[P]] to [[LOCAL_i64:i64 addrspace\(3\)\*]] + // CHECK-NEXT: [[BOOL:%[a-z0-9]+]] = icmp ne [[LOCAL_i64]] [[Q]], null + // CHECK-NEXT: br i1 [[BOOL]] + + if (to_private(ptr)) + (void)0; + // CHECK: [[P:%[0-9]+]] = call [[PRIVATE_VOID:i8\*]] @__to_private([[GENERIC_VOID]] {{%[0-9]+}}) + // CHECK-NEXT: [[Q:%[0-9]+]] = bitcast [[PRIVATE_VOID]] [[P]] to [[PRIVATE_i64:i64\*]] + // CHECK-NEXT: [[BOOL:%[a-z0-9]+]] = icmp ne [[PRIVATE_i64]] [[Q]], null + // CHECK-NEXT: br i1 [[BOOL]] +} + diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl index d74a1dfbd484..473219478a2f 100644 --- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -1,30 +1,33 @@ // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B32 // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=COMMON --check-prefix=B64 +// RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES #pragma OPENCL EXTENSION cl_khr_subgroups : enable typedef void (^bl_t)(local void *); typedef struct {int a;} ndrange_t; +// COMMON: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself. -// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) +// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) } +// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*) // For anonymous blocks without captures, emit block literals as global variable. -// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } -// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} } +// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVG8:@[^ ]+]] to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INVG9:@[^ ]+]] to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } +// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) } // Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G -// COMMON: define internal spir_func void [[INV_G:.*]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}}) +// COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}}) const bl_t block_G = (bl_t) ^ (local void *a) {}; void callee(int id, __global int *out) { @@ -46,12 +49,36 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: %event_wait_list2 = alloca [1 x %opencl.clk_event_t*] clk_event_t event_wait_list2[] = {clk_event}; - // Emits block literal on stack and block kernel [[INVLK1]]. // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4 + + // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32] + // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] + // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64] + // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32] + // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] + // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64] + // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32] + // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] + // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64] + // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32] + // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] + // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64] + // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32] + // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] + // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64] + // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32] + // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] + // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64] + // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32] + // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] + // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64] + + // Emits block literal on stack and block kernel [[INVLK1]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* - // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke + // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()* + // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()* // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* // COMMON-LABEL: call i32 @__enqueue_kernel_basic( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, @@ -67,54 +94,68 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* - // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke + // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()* // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)* // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), // COMMON-SAME: i8 addrspace(4)* [[BL_I8]]) - enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, ^(void) { a[i] = b[i]; }); + // COMMON-LABEL: call i32 @__enqueue_kernel_basic_events + // COMMON-SAME: (%opencl.queue_t{{.*}}* {{%[0-9]+}}, i32 {{%[0-9]+}}, %struct.ndrange_t* {{.*}}, i32 1, %opencl.clk_event_t{{.*}}* addrspace(4)* {{%[0-9]+}}, %opencl.clk_event_t{{.*}}* addrspace(4)* null, + enqueue_kernel(default_queue, flags, ndrange, 1, &event_wait_list, 0, + ^(void) { + return; + }); + // Emits global block literal [[BLG1]] and block kernel [[INVGK1]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // B32: %[[TMP:.*]] = alloca [1 x i32] - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 - // B32: store i32 256, i32* %[[TMP1]], align 4 - // B64: %[[TMP:.*]] = alloca [1 x i64] - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 - // B64: store i64 256, i64* %[[TMP1]], align 8 + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES1]] to i8* + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i64 0, i64 0 + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES1]], i32 0, i32 0 + // B32: store i32 256, i32* %[[TMP]], align 4 + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES1]], i32 0, i32 0 + // B64: store i64 256, i64* %[[TMP]], align 8 // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, - // B32-SAME: i32* %[[TMP1]]) - // B64-SAME: i64* %[[TMP1]]) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // B32-SAME: i32* %[[TMP]]) + // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; }, 256); + char c; // Emits global block literal [[BLG2]] and block kernel [[INVGK2]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // B32: %[[TMP:.*]] = alloca [1 x i32] - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 - // B64: %[[TMP:.*]] = alloca [1 x i64] - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES2]] to i8* + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i64 0, i64 0 + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES2]], i32 0, i32 0 + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES2]], i32 0, i32 0 + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 // COMMON-LABEL: call i32 @__enqueue_kernel_varargs( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, - // B32-SAME: i32* %[[TMP1]]) - // B64-SAME: i64* %[[TMP1]]) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // B32-SAME: i32* %[[TMP]]) + // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -127,18 +168,21 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* - // B32: %[[TMP:.*]] = alloca [1 x i32] - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 - // B32: store i32 256, i32* %[[TMP1]], align 4 - // B64: %[[TMP:.*]] = alloca [1 x i64] - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 - // B64: store i64 256, i64* %[[TMP1]], align 8 + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES3]] to i8* + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i64 0, i64 0 + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs( + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES3]], i32 0, i32 0 + // B32: store i32 256, i32* %[[TMP]], align 4 + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES3]], i32 0, i32 0 + // B64: store i64 256, i64* %[[TMP]], align 8 // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, - // B32-SAME: i32* %[[TMP1]]) - // B64-SAME: i64* %[[TMP1]]) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // B32-SAME: i32* %[[TMP]]) + // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, ^(local void *p) { return; @@ -151,18 +195,21 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 // COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** [[AD]] to %opencl.clk_event_t{{.*}}* addrspace(4)* // COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)* - // B32: %[[TMP:.*]] = alloca [1 x i32] - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 - // B64: %[[TMP:.*]] = alloca [1 x i64] - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES4]] to i8* + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i64 0, i64 0 + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_events_varargs( + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES4]], i32 0, i32 0 + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES4]], i32 0, i32 0 + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, - // B32-SAME: i32* %[[TMP1]]) - // B64-SAME: i64* %[[TMP1]]) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // B32-SAME: i32* %[[TMP]]) + // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, ^(local void *p) { return; @@ -173,18 +220,21 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // Emits global block literal [[BLG5]] and block kernel [[INVGK5]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // B32: %[[TMP:.*]] = alloca [1 x i32] - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 - // B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4 - // B64: %[[TMP:.*]] = alloca [1 x i64] - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 - // B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8 + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES5]] to i8* + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i64 0, i64 0 + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES5]], i32 0, i32 0 + // B32: store i32 %{{.*}}, i32* %[[TMP]], align 4 + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES5]], i32 0, i32 0 + // B64: store i64 %{{.*}}, i64* %[[TMP]], align 8 // COMMON-LABEL: call i32 @__enqueue_kernel_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, - // B32-SAME: i32* %[[TMP1]]) - // B64-SAME: i64* %[[TMP1]]) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // B32-SAME: i32* %[[TMP]]) + // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -194,26 +244,29 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // Emits global block literal [[BLG6]] and block kernel [[INVGK6]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // B32: %[[TMP:.*]] = alloca [3 x i32] - // B32: %[[TMP1:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 0 - // B32: store i32 1, i32* %[[TMP1]], align 4 - // B32: %[[TMP2:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 1 - // B32: store i32 2, i32* %[[TMP2]], align 4 - // B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 2 - // B32: store i32 4, i32* %[[TMP3]], align 4 - // B64: %[[TMP:.*]] = alloca [3 x i64] - // B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 0 - // B64: store i64 1, i64* %[[TMP1]], align 8 - // B64: %[[TMP2:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 1 - // B64: store i64 2, i64* %[[TMP2]], align 8 - // B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 2 - // B64: store i64 4, i64* %[[TMP3]], align 8 + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [3 x i64]* %[[BLOCK_SIZES6]] to i8* + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]]) + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i64 0, i64 0 + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull [[LIFETIME_PTR]]) + // B32: %[[TMP:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 0 + // B32: store i32 1, i32* %[[TMP]], align 4 + // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 1 + // B32: store i32 2, i32* %[[BLOCK_SIZES62]], align 4 + // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], [3 x i32]* %[[BLOCK_SIZES6]], i32 0, i32 2 + // B32: store i32 4, i32* %[[BLOCK_SIZES63]], align 4 + // B64: %[[TMP:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 0 + // B64: store i64 1, i64* %[[TMP]], align 8 + // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 1 + // B64: store i64 2, i64* %[[BLOCK_SIZES62]], align 8 + // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], [3 x i64]* %[[BLOCK_SIZES6]], i32 0, i32 2 + // B64: store i64 4, i64* %[[BLOCK_SIZES63]], align 8 // COMMON-LABEL: call i32 @__enqueue_kernel_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, - // B32-SAME: i32* %[[TMP1]]) - // B64-SAME: i64* %[[TMP1]]) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, + // B32-SAME: i32* %[[TMP]]) + // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p1, local void *p2, local void *p3) { return; @@ -223,18 +276,21 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // Emits global block literal [[BLG7]] and block kernel [[INVGK7]]. // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t*, %opencl.queue_t** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags - // B32: %[[TMP:.*]] = alloca [1 x i32] - // B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0 - // B32: store i32 0, i32* %[[TMP1]], align 4 - // B64: %[[TMP:.*]] = alloca [1 x i64] - // B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0 - // B64: store i64 4294967296, i64* %[[TMP1]], align 8 + // CHECK-LIFETIMES: [[LIFETIME_PTR:%[0-9]+]] = bitcast [1 x i64]* %[[BLOCK_SIZES7]] to i8* + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // CHECK-LIFETIMES-NEXT: getelementptr inbounds [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i64 0, i64 0 + // CHECK-LIFETIMES-LABEL: call i32 @__enqueue_kernel_varargs( + // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull [[LIFETIME_PTR]]) + // B32: %[[TMP:.*]] = getelementptr [1 x i32], [1 x i32]* %[[BLOCK_SIZES7]], i32 0, i32 0 + // B32: store i32 0, i32* %[[TMP]], align 4 + // B64: %[[TMP:.*]] = getelementptr [1 x i64], [1 x i64]* %[[BLOCK_SIZES7]], i32 0, i32 0 + // B64: store i64 4294967296, i64* %[[TMP]], align 8 // COMMON-LABEL: call i32 @__enqueue_kernel_varargs // COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, - // B32-SAME: i32* %[[TMP1]]) - // B64-SAME: i64* %[[TMP1]]) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, + // B32-SAME: i32* %[[TMP]]) + // B64-SAME: i64* %[[TMP]]) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p) { return; @@ -244,19 +300,21 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // Emits global block literal [[BLG8]] and invoke function [[INVG8]]. // The full type of these expressions are long (and repeated elsewhere), so we // capture it as part of the regex for convenience and clarity. - // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A + // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A void (^const block_A)(void) = ^{ return; }; // Emits global block literal [[BLG9]] and invoke function [[INVG9]]. - // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B + // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B void (^const block_B)(local void *) = ^(local void *a) { return; }; // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. - // COMMON: call spir_func void [[INVG8:.*]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) + // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* + // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]]. @@ -265,17 +323,19 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON-LABEL: call i32 @__enqueue_kernel_basic( // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) enqueue_kernel(default_queue, flags, ndrange, block_A); // Uses block kernel [[INVGK8]] and global block literal [[BLG8]]. // COMMON: call i32 @__get_kernel_work_group_size_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) unsigned size = get_kernel_work_group_size(block_A); // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted. - // COMMON: call spir_func void [[INVG8]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2) + // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)* + // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) block_A(); void (^block_C)(void) = ^{ @@ -283,6 +343,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { }; // Emits block literal on stack and block kernel [[INVLK3]]. + // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)* @@ -292,34 +353,34 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON-SAME: i8 addrspace(4)* [[BL_I8]]) enqueue_kernel(default_queue, flags, ndrange, block_C); - // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INVG9]]. + // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]]. // COMMON: call i32 @__get_kernel_work_group_size_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_work_group_size(block_B); // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted. // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_A); // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]]. // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl( // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_preferred_work_group_size_multiple(block_G); // Emits global block literal [[BLG10]] and block kernel [[INVGK10]]. // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK10:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){}); // Emits global block literal [[BLG11]] and block kernel [[INVGK11]]. // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}}, // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK11:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*), - // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*)) + // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*)) size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){}); } @@ -336,12 +397,12 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: define internal spir_kernel void [[INVGK5]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK6]](i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) #{{[0-9]+}} { // COMMON: entry: -// COMMON: call void @__device_side_enqueue_block_invoke_8(i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3) +// COMMON: call void @__device_side_enqueue_block_invoke_9(i8 addrspace(4)* %0, i8 addrspace(3)* %1, i8 addrspace(3)* %2, i8 addrspace(3)* %3) // COMMON: ret void // COMMON: } // COMMON: define internal spir_kernel void [[INVGK7]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) // COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}}) -// COMMON: define internal spir_func void [[INVG9:.*]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) +// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}}) // COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}}) // COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}}) diff --git a/test/CodeGenOpenCL/constant-addr-space-globals.cl b/test/CodeGenOpenCL/constant-addr-space-globals.cl index 7bb970527c26..5fcf117dde33 100644 --- a/test/CodeGenOpenCL/constant-addr-space-globals.cl +++ b/test/CodeGenOpenCL/constant-addr-space-globals.cl @@ -13,8 +13,8 @@ kernel void test(global float *out) { void foo(constant int* p, constant const int *p1, const int *p2, const int *p3); // CHECK: @k.arr1 = internal addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3] -// CHECK: @k.arr2 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 4, i32 5, i32 6] -// CHECK: @k.arr3 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 7, i32 8, i32 9] +// CHECK: @__const.k.arr2 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 4, i32 5, i32 6] +// CHECK: @__const.k.arr3 = private unnamed_addr addrspace(2) constant [3 x i32] [i32 7, i32 8, i32 9] // CHECK: @k.var1 = internal addrspace(2) constant i32 1 kernel void k(void) { // CHECK-NOT: %arr1 = alloca [3 x i32] diff --git a/test/CodeGenOpenCL/denorms-are-zero.cl b/test/CodeGenOpenCL/denorms-are-zero.cl index ab8bf7641c87..d572b1321124 100644 --- a/test/CodeGenOpenCL/denorms-are-zero.cl +++ b/test/CodeGenOpenCL/denorms-are-zero.cl @@ -1,8 +1,26 @@ // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck %s --check-prefix=DENORM-ZERO -// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN -// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN-DENORM -// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck --check-prefix=AMDGCN-FEATURE %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck -check-prefix=DENORM-ZERO %s + +// Slow FMAF and slow f32 denormals +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// Fast FMAF, but slow f32 denormals +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu tahiti %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu tahiti %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// Fast F32 denormals, but slow FMAF +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// Fast F32 denormals and fast FMAF +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu gfx900 %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu gfx900 %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FEATURE %s +// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FEATURE %s + + // For all targets 'denorms-are-zero' attribute is set to 'true' // if '-cl-denorms-are-zero' was specified and to 'false' otherwise. @@ -17,9 +35,11 @@ // explicitly set. amdgcn target always do not flush fp64 denormals. The control for fp64 and fp16 denormals is the same. // AMDGCN-LABEL: define void @f() -// AMDGCN: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" -// AMDGCN-DENORM-LABEL: define void @f() -// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" -// AMDGCN-FEATURE-LABEL: define void @f() + +// AMDGCN-FLUSH: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" +// AMDGCN-FLUSH-OPT: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" + +// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}+fp64-fp16-denormals{{[^"]*}}" + // AMDGCN-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-fp16-denormals{{[^"]*}}" void f() {} diff --git a/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl b/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl new file mode 100644 index 000000000000..8b5c5df2e4ba --- /dev/null +++ b/test/CodeGenOpenCL/enqueue-kernel-non-entry-block.cl @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=COMMON,AMDGPU +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR32 +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" < %s | FileCheck %s --check-prefixes=COMMON,SPIR64 +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -debug-info-kind=limited -emit-llvm -o - -triple amdgcn < %s | FileCheck %s --check-prefixes=CHECK-DEBUG + +// Check that the enqueue_kernel array temporary is in the entry block to avoid +// a dynamic alloca + +typedef struct {int a;} ndrange_t; + +kernel void test(int i) { +// COMMON-LABEL: define {{.*}} void @test +// COMMON-LABEL: entry: +// AMDGPU: %block_sizes = alloca [1 x i64] +// SPIR32: %block_sizes = alloca [1 x i32] +// SPIR64: %block_sizes = alloca [1 x i64] +// COMMON-LABEL: if.then: +// COMMON-NOT: alloca +// CHECK-DEBUG: getelementptr {{.*}} %block_sizes, {{.*}} !dbg ![[TEMPLOCATION:[0-9]+]] +// COMMON-LABEL: if.end + queue_t default_queue; + unsigned flags = 0; + ndrange_t ndrange; + if (i) + enqueue_kernel(default_queue, flags, ndrange, ^(local void *a) { }, 32); +} + +// Check that the temporary is scoped to the `if` + +// CHECK-DEBUG: ![[TESTFILE:[0-9]+]] = !DIFile(filename: "<stdin>" +// CHECK-DEBUG: ![[TESTSCOPE:[0-9]+]] = distinct !DISubprogram(name: "test", {{.*}} file: ![[TESTFILE]] +// CHECK-DEBUG: ![[IFSCOPE:[0-9]+]] = distinct !DILexicalBlock(scope: ![[TESTSCOPE]], file: ![[TESTFILE]], line: 24) +// CHECK-DEBUG: ![[TEMPLOCATION]] = !DILocation(line: 25, scope: ![[IFSCOPE]]) diff --git a/test/CodeGenOpenCL/fpmath.cl b/test/CodeGenOpenCL/fpmath.cl index 8908861ace8e..0108d909c94e 100644 --- a/test/CodeGenOpenCL/fpmath.cl +++ b/test/CodeGenOpenCL/fpmath.cl @@ -16,7 +16,7 @@ float spscalardiv(float a, float b) { float4 spvectordiv(float4 a, float4 b) { // CHECK: @spvectordiv - // CHECK: #[[ATTR]] + // CHECK: #[[ATTR2:[0-9]+]] // CHECK: fdiv{{.*}}, // NODIVOPT: !fpmath ![[MD]] // DIVOPT-NOT: !fpmath ![[MD]] @@ -45,7 +45,11 @@ double dpscalardiv(double a, double b) { #endif // CHECK: attributes #[[ATTR]] = { -// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false" -// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true" -// CHECK: } +// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false" +// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true" +// CHECK-SAME: } +// CHECK: attributes #[[ATTR2]] = { +// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false" +// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true" +// CHECK-SAME: } // NODIVOPT: ![[MD]] = !{float 2.500000e+00} diff --git a/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl b/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl new file mode 100644 index 000000000000..515f13f6e768 --- /dev/null +++ b/test/CodeGenOpenCL/intel-subgroups-avc-ext-types.cl @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -cl-ext=+cl_intel_device_side_avc_motion_estimation -emit-llvm -o - -O0 | FileCheck %s + +// CHECK: %opencl.intel_sub_group_avc_mce_payload_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_ime_payload_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_ref_payload_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_sic_payload_t = type opaque + +// CHECK: %opencl.intel_sub_group_avc_mce_result_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_ime_result_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_ref_result_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_sic_result_t = type opaque + +// CHECK: %opencl.intel_sub_group_avc_ime_result_single_reference_streamout_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_ime_result_dual_reference_streamout_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_ime_single_reference_streamin_t = type opaque +// CHECK: %opencl.intel_sub_group_avc_ime_dual_reference_streamin_t = type opaque + +// CHECK: store %opencl.intel_sub_group_avc_ime_payload_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ref_payload_t* null, +// CHECK: store %opencl.intel_sub_group_avc_sic_payload_t* null, + +// CHECK: store %opencl.intel_sub_group_avc_ime_result_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ref_result_t* null, +// CHECK: store %opencl.intel_sub_group_avc_sic_result_t* null, + +// CHECK: store %opencl.intel_sub_group_avc_ime_result_single_reference_streamout_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ime_result_dual_reference_streamout_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ime_single_reference_streamin_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ime_dual_reference_streamin_t* null, +// +// CHECK: store %opencl.intel_sub_group_avc_ime_payload_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ref_payload_t* null, +// CHECK: store %opencl.intel_sub_group_avc_sic_payload_t* null, + +// CHECK: store %opencl.intel_sub_group_avc_ime_result_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ref_result_t* null, +// CHECK: store %opencl.intel_sub_group_avc_sic_result_t* null, + +// CHECK: store %opencl.intel_sub_group_avc_ime_result_single_reference_streamout_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ime_result_dual_reference_streamout_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ime_single_reference_streamin_t* null, +// CHECK: store %opencl.intel_sub_group_avc_ime_dual_reference_streamin_t* null, + +#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : enable + +// Using 0x0 directly allows us not to include opencl-c.h header and not to +// redefine all of these CLK_AVC_*_INTITIALIZE_INTEL macro. '0x0' value must +// be in sync with ones defined in opencl-c.h + +void foo() { + intel_sub_group_avc_mce_payload_t payload_mce; // No literal initializer for mce types + intel_sub_group_avc_ime_payload_t payload_ime = 0x0; + intel_sub_group_avc_ref_payload_t payload_ref = 0x0; + intel_sub_group_avc_sic_payload_t payload_sic = 0x0; + + intel_sub_group_avc_mce_result_t result_mce; // No literal initializer for mce types + intel_sub_group_avc_ime_result_t result_ime = 0x0; + intel_sub_group_avc_ref_result_t result_ref = 0x0; + intel_sub_group_avc_sic_result_t result_sic = 0x0; + + intel_sub_group_avc_ime_result_single_reference_streamout_t sstreamout = 0x0; + intel_sub_group_avc_ime_result_dual_reference_streamout_t dstreamout = 0x0; + intel_sub_group_avc_ime_single_reference_streamin_t sstreamin = 0x0; + intel_sub_group_avc_ime_dual_reference_streamin_t dstreamin = 0x0; + + // Initialization with initializer list was supported in the first version + // of the extension. So we check for backward compatibility here. + intel_sub_group_avc_ime_payload_t payload_ime_list = {0}; + intel_sub_group_avc_ref_payload_t payload_ref_list = {0}; + intel_sub_group_avc_sic_payload_t payload_sic_list = {0}; + + intel_sub_group_avc_ime_result_t result_ime_list = {0}; + intel_sub_group_avc_ref_result_t result_ref_list = {0}; + intel_sub_group_avc_sic_result_t result_sic_list = {0}; + + intel_sub_group_avc_ime_result_single_reference_streamout_t sstreamout_list = {0}; + intel_sub_group_avc_ime_result_dual_reference_streamout_t dstreamout_list = {0}; + intel_sub_group_avc_ime_single_reference_streamin_t sstreamin_list = {0}; + intel_sub_group_avc_ime_dual_reference_streamin_t dstreamin_list = {0}; +} + diff --git a/test/CodeGenOpenCL/numbered-address-space.cl b/test/CodeGenOpenCL/numbered-address-space.cl new file mode 100644 index 000000000000..dbaba8747678 --- /dev/null +++ b/test/CodeGenOpenCL/numbered-address-space.cl @@ -0,0 +1,34 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -O0 -o - %s | FileCheck %s + +// Make sure using numbered address spaces doesn't trigger crashes when a +// builtin has an address space parameter. + +// CHECK-LABEL: @test_numbered_as_to_generic( +// CHECK: addrspacecast i32 addrspace(42)* %0 to i32* +void test_numbered_as_to_generic(__attribute__((address_space(42))) int *arbitary_numbered_ptr) { + generic int* generic_ptr = arbitary_numbered_ptr; + *generic_ptr = 4; +} + +// CHECK-LABEL: @test_numbered_as_to_builtin( +// CHECK: addrspacecast i32 addrspace(42)* %0 to float addrspace(3)* +void test_numbered_as_to_builtin(__attribute__((address_space(42))) int *arbitary_numbered_ptr, float src) { + volatile float result = __builtin_amdgcn_ds_fmaxf(arbitary_numbered_ptr, src, 0, 0, false); +} + +// CHECK-LABEL: @test_generic_as_to_builtin_parameter_explicit_cast( +// CHECK: addrspacecast i32 addrspace(3)* %0 to i32* +void test_generic_as_to_builtin_parameter_explicit_cast(__local int *local_ptr, float src) { + generic int* generic_ptr = local_ptr; + volatile float result = __builtin_amdgcn_ds_fmaxf((__local float*) generic_ptr, src, 0, 0, false); +} + +// CHECK-LABEL: @test_generic_as_to_builtin_parameter_implicit_cast( +// CHECK: addrspacecast i32* %2 to float addrspace(3)* +void test_generic_as_to_builtin_parameter_implicit_cast(__local int *local_ptr, float src) { + generic int* generic_ptr = local_ptr; + + volatile float result = __builtin_amdgcn_ds_fmaxf(generic_ptr, src, 0, 0, false); +} + diff --git a/test/CodeGenOpenCL/partial_initializer.cl b/test/CodeGenOpenCL/partial_initializer.cl index ee6be919a7fa..4e8299584135 100644 --- a/test/CodeGenOpenCL/partial_initializer.cl +++ b/test/CodeGenOpenCL/partial_initializer.cl @@ -24,7 +24,7 @@ int4 GV1 = (int4)((int2)(1,2),3,4); // CHECK: @GV2 = addrspace(1) global <4 x i32> <i32 1, i32 1, i32 1, i32 1>, align 16 int4 GV2 = (int4)(1); -// CHECK: @f.S = private unnamed_addr addrspace(2) constant %struct.StrucTy { i32 1, i32 2, i32 0 }, align 4 +// CHECK: @__const.f.S = private unnamed_addr addrspace(2) constant %struct.StrucTy { i32 1, i32 2, i32 0 }, align 4 // CHECK-LABEL: define spir_func void @f() void f(void) { @@ -46,7 +46,7 @@ void f(void) { float A[6][6] = {1.0f, 2.0f}; // CHECK: %[[v5:.*]] = bitcast %struct.StrucTy* %S to i8* - // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[v5]], i8 addrspace(2)* align 4 bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i1 false) + // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[v5]], i8 addrspace(2)* align 4 bitcast (%struct.StrucTy addrspace(2)* @__const.f.S to i8 addrspace(2)*), i32 12, i1 false) StrucTy S = {1, 2}; // CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>* %[[compoundliteral1]], align 8 diff --git a/test/CodeGenOpenCL/pipe_builtin.cl b/test/CodeGenOpenCL/pipe_builtin.cl index d912fce5e95d..2a533c54c13c 100644 --- a/test/CodeGenOpenCL/pipe_builtin.cl +++ b/test/CodeGenOpenCL/pipe_builtin.cl @@ -69,25 +69,3 @@ void test8(write_only pipe int p, global int *ptr) { // CHECK: call i32 @__get_pipe_max_packets_wo(%opencl.pipe_wo_t* %{{.*}}, i32 4, i32 4) *ptr = get_pipe_max_packets(p); } - -void test9(read_only pipe int r, write_only pipe int w, global int *ptr) { - // verify that return type is correctly casted to i1 value - // CHECK: %[[R:[0-9]+]] = call i32 @__read_pipe_2 - // CHECK: icmp ne i32 %[[R]], 0 - if (read_pipe(r, ptr)) *ptr = -1; - // CHECK: %[[W:[0-9]+]] = call i32 @__write_pipe_2 - // CHECK: icmp ne i32 %[[W]], 0 - if (write_pipe(w, ptr)) *ptr = -1; - // CHECK: %[[NR:[0-9]+]] = call i32 @__get_pipe_num_packets_ro - // CHECK: icmp ne i32 %[[NR]], 0 - if (get_pipe_num_packets(r)) *ptr = -1; - // CHECK: %[[NW:[0-9]+]] = call i32 @__get_pipe_num_packets_wo - // CHECK: icmp ne i32 %[[NW]], 0 - if (get_pipe_num_packets(w)) *ptr = -1; - // CHECK: %[[MR:[0-9]+]] = call i32 @__get_pipe_max_packets_ro - // CHECK: icmp ne i32 %[[MR]], 0 - if (get_pipe_max_packets(r)) *ptr = -1; - // CHECK: %[[MW:[0-9]+]] = call i32 @__get_pipe_max_packets_wo - // CHECK: icmp ne i32 %[[MW]], 0 - if (get_pipe_max_packets(w)) *ptr = -1; -} diff --git a/test/CodeGenOpenCL/printf.cl b/test/CodeGenOpenCL/printf.cl new file mode 100644 index 000000000000..346f6c35bae4 --- /dev/null +++ b/test/CodeGenOpenCL/printf.cl @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=FP64,ALL %s +// RUN: %clang_cc1 -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spir-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck -check-prefixes=NOFP64,ALL %s + +typedef __attribute__((ext_vector_type(2))) float float2; +typedef __attribute__((ext_vector_type(2))) half half2; + +#ifdef cl_khr_fp64 +typedef __attribute__((ext_vector_type(2))) double double2; +#endif + +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); + + +// ALL-LABEL: @test_printf_float2( +// FP64: %conv = fpext <2 x float> %0 to <2 x double> +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) + +// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) +kernel void test_printf_float2(float2 arg) { + printf("%v2f", arg); +} + +// ALL-LABEL: @test_printf_half2( +// FP64: %conv = fpext <2 x half> %0 to <2 x double> +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %conv) #2 + +// NOFP64: %conv = fpext <2 x half> %0 to <2 x float> +// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %conv) #2 +kernel void test_printf_half2(half2 arg) { + printf("%v2f", arg); +} + +#ifdef cl_khr_fp64 +// FP64-LABEL: @test_printf_double2( +// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([5 x i8], [5 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x double> %0) #2 +kernel void test_printf_double2(double2 arg) { + printf("%v2f", arg); +} +#endif diff --git a/test/CodeGenOpenCL/private-array-initialization.cl b/test/CodeGenOpenCL/private-array-initialization.cl index 9aa058dcfacf..420270de1933 100644 --- a/test/CodeGenOpenCL/private-array-initialization.cl +++ b/test/CodeGenOpenCL/private-array-initialization.cl @@ -6,11 +6,11 @@ void test() { __private int arr[] = {1, 2, 3}; // PRIVATE0: %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8* -// PRIVATE0: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i1 false) +// PRIVATE0: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @__const.test.arr to i8 addrspace(2)*), i32 12, i1 false) // PRIVATE5: %arr = alloca [3 x i32], align 4, addrspace(5) // PRIVATE5: %0 = bitcast [3 x i32] addrspace(5)* %arr to i8 addrspace(5)* -// PRIVATE5: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %0, i8 addrspace(4)* align 4 bitcast ([3 x i32] addrspace(4)* @test.arr to i8 addrspace(4)*), i64 12, i1 false) +// PRIVATE5: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %0, i8 addrspace(4)* align 4 bitcast ([3 x i32] addrspace(4)* @__const.test.arr to i8 addrspace(4)*), i64 12, i1 false) } __kernel void initializer_cast_is_valid_crash() { |
