aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGenOpenCL
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 11:06:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 11:06:01 +0000
commit486754660bb926339aefcf012a3f848592babb8b (patch)
treeecdbc446c9876f4f120f701c243373cd3cb43db3 /test/CodeGenOpenCL
parent55e6d896ad333f07bb3b1ba487df214fc268a4ab (diff)
Notes
Diffstat (limited to 'test/CodeGenOpenCL')
-rw-r--r--test/CodeGenOpenCL/addr-space-struct-arg.cl103
-rw-r--r--test/CodeGenOpenCL/address-space-constant-initializers.cl21
-rw-r--r--test/CodeGenOpenCL/address-spaces.cl46
-rw-r--r--test/CodeGenOpenCL/amdgcn-automatic-variable.cl2
-rw-r--r--test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl42
-rw-r--r--test/CodeGenOpenCL/amdgpu-alignment.cl168
-rw-r--r--test/CodeGenOpenCL/amdgpu-attrs.cl55
-rw-r--r--test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl26
-rw-r--r--test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl29
-rw-r--r--test/CodeGenOpenCL/amdgpu-env-amdgcn.cl5
-rw-r--r--test/CodeGenOpenCL/amdgpu-env-amdgiz.cl9
-rw-r--r--test/CodeGenOpenCL/amdgpu-features.cl12
-rw-r--r--test/CodeGenOpenCL/amdgpu-nullptr.cl166
-rw-r--r--test/CodeGenOpenCL/blocks.cl66
-rw-r--r--test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl25
-rw-r--r--test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl36
-rw-r--r--test/CodeGenOpenCL/builtins-amdgcn-vi.cl17
-rw-r--r--test/CodeGenOpenCL/builtins-amdgcn.cl12
-rw-r--r--test/CodeGenOpenCL/byval.cl8
-rw-r--r--test/CodeGenOpenCL/cast_image.cl2
-rw-r--r--test/CodeGenOpenCL/cl-uniform-wg-size.cl16
-rw-r--r--test/CodeGenOpenCL/cl20-device-side-enqueue.cl119
-rw-r--r--test/CodeGenOpenCL/convergent.cl5
-rw-r--r--test/CodeGenOpenCL/denorms-are-zero.cl32
-rw-r--r--test/CodeGenOpenCL/half.cl1
-rw-r--r--test/CodeGenOpenCL/inline-asm-amdgcn.cl8
-rw-r--r--test/CodeGenOpenCL/kernel-attributes.cl6
-rw-r--r--test/CodeGenOpenCL/kernel-metadata.cl2
-rw-r--r--test/CodeGenOpenCL/lifetime.cl4
-rw-r--r--test/CodeGenOpenCL/opencl_types.cl41
-rw-r--r--test/CodeGenOpenCL/partial_initializer.cl14
-rw-r--r--test/CodeGenOpenCL/pipe_builtin.cl66
-rw-r--r--test/CodeGenOpenCL/pipe_types.cl19
-rw-r--r--test/CodeGenOpenCL/private-array-initialization.cl30
-rw-r--r--test/CodeGenOpenCL/shifts.cl15
-rw-r--r--test/CodeGenOpenCL/size_t.cl51
-rw-r--r--test/CodeGenOpenCL/str_literals.cl16
-rw-r--r--test/CodeGenOpenCL/vla.cl13
38 files changed, 783 insertions, 525 deletions
diff --git a/test/CodeGenOpenCL/addr-space-struct-arg.cl b/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 68fc8035b4cc..f8d7073f92ee 100644
--- a/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -enable-var-scope -check-prefixes=COM,AMD %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
typedef struct {
int cells[9];
@@ -35,9 +36,12 @@ struct LargeStructTwoMember {
int2 y[20];
};
+#if __OPENCL_C_VERSION__ >= 200
+struct LargeStructOneMember g_s;
+#endif
// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
-// AMD-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
+// AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
Mat4X4 out;
return out;
@@ -49,15 +53,15 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
-// AMD: load [9 x i32], [9 x i32] addrspace(1)*
-// AMD: call %struct.Mat4X4 @foo([9 x i32]
-// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+// AMDGCN: load [9 x i32], [9 x i32] addrspace(1)*
+// AMDGCN: call %struct.Mat4X4 @foo([9 x i32]
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
out[0] = foo(in[1]);
}
// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval align 4 %in)
-// AMD-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval align 4 %in)
Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
Mat64X64 out;
return out;
@@ -68,66 +72,97 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
// the return value.
// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
-// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
-// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
+// AMDGCN: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMDGCN: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
out[0] = foo_large(in[1]);
}
-// AMD-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce)
+// AMDGCN-LABEL: define void @FuncOneMember(<2 x i32> %u.coerce)
void FuncOneMember(struct StructOneMember u) {
u.x = (int2)(0, 0);
}
-// AMD-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMDGCN-NOT: addrspacecast
+// AMDGCN: store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
void FuncOneLargeMember(struct LargeStructOneMember u) {
u.x[0] = (int2)(0, 0);
}
-// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
-// AMD-SAME: (<2 x i32> %[[u_coerce:.*]])
-// AMD: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
-// AMD: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
-// AMD: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
-// AMD: call void @FuncOneMember(<2 x i32>
+// AMDGCN20-LABEL: define void @test_indirect_arg_globl()
+// AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN20: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMDGCN20: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(1)* align 8 bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i1 false)
+// AMDGCN20: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+#if __OPENCL_C_VERSION__ >= 200
+void test_indirect_arg_globl(void) {
+ FuncOneLargeMember(g_s);
+}
+#endif
+
+// AMDGCN-LABEL: define amdgpu_kernel void @test_indirect_arg_local()
+// AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMDGCN: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* align 8 %[[r0]], i8 addrspace(3)* align 8 bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i1 false)
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+kernel void test_indirect_arg_local(void) {
+ local struct LargeStructOneMember l_s;
+ FuncOneLargeMember(l_s);
+}
+
+// AMDGCN-LABEL: define void @test_indirect_arg_private()
+// AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN-NOT: @llvm.memcpy
+// AMDGCN-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]])
+void test_indirect_arg_private(void) {
+ struct LargeStructOneMember p_s;
+ FuncOneLargeMember(p_s);
+}
+
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]])
+// AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMDGCN: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]]
+// AMDGCN: call void @FuncOneMember(<2 x i32>
kernel void KernelOneMember(struct StructOneMember u) {
FuncOneMember(u);
}
-// AMD-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
-// AMD: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
-// AMD: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
-// AMD: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
+// AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMDGCN: store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
+// AMDGCN: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[U]])
kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
FuncOneLargeMember(u);
}
-// AMD-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
+// AMDGCN-LABEL: define void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
void FuncTwoMember(struct StructTwoMember u) {
u.y = (int2)(0, 0);
}
-// AMD-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
+// AMDGCN-LABEL: define void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %u)
void FuncLargeTwoMember(struct LargeStructTwoMember u) {
u.y[0] = (int2)(0, 0);
}
-
-// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
-// AMD-SAME: (%struct.StructTwoMember %[[u_coerce:.*]])
-// AMD: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
-// AMD: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
-// AMD: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
-// AMD: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMDGCN: %[[LD0:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: %[[LD1:.*]] = load <2 x i32>, <2 x i32> addrspace(5)*
+// AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
kernel void KernelTwoMember(struct StructTwoMember u) {
FuncTwoMember(u);
}
-// AMD-LABEL: define amdgpu_kernel void @KernelLargeTwoMember
-// AMD-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]])
-// AMD: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
-// AMD: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
-// AMD: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
+// AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeTwoMember
+// AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]])
+// AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
+// AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], %struct.LargeStructTwoMember addrspace(5)* %[[u]]
+// AMDGCN: call void @FuncLargeTwoMember(%struct.LargeStructTwoMember addrspace(5)* byval align 8 %[[u]])
kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
FuncLargeTwoMember(u);
}
diff --git a/test/CodeGenOpenCL/address-space-constant-initializers.cl b/test/CodeGenOpenCL/address-space-constant-initializers.cl
index a03d939fae54..da971cfee383 100644
--- a/test/CodeGenOpenCL/address-space-constant-initializers.cl
+++ b/test/CodeGenOpenCL/address-space-constant-initializers.cl
@@ -1,6 +1,5 @@
-// RUN: %clang_cc1 %s -ffake-address-space-map -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -ffake-address-space-map -emit-llvm -o - | FileCheck -check-prefix=FAKE %s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck -check-prefix=AMDGCN %s
typedef struct {
int i;
@@ -13,10 +12,22 @@ typedef struct {
__constant float* constant_float_ptr;
} ConstantArrayPointerStruct;
-// CHECK: %struct.ConstantArrayPointerStruct = type { float addrspace(2)* }
-// CHECK: addrspace(2) constant %struct.ConstantArrayPointerStruct { float addrspace(2)* bitcast (i8 addrspace(2)* getelementptr (i8, i8 addrspace(2)* bitcast (%struct.ArrayStruct addrspace(2)* @constant_array_struct to i8 addrspace(2)*), i64 4) to float addrspace(2)*) }
+// FAKE: %struct.ConstantArrayPointerStruct = type { float addrspace(2)* }
+// FAKE: addrspace(2) constant %struct.ConstantArrayPointerStruct { float addrspace(2)* bitcast (i8 addrspace(2)* getelementptr (i8, i8 addrspace(2)* bitcast (%struct.ArrayStruct addrspace(2)* @constant_array_struct to i8 addrspace(2)*), i64 4) to float addrspace(2)*) }
+// AMDGCN: %struct.ConstantArrayPointerStruct = type { float addrspace(4)* }
+// AMDGCN: addrspace(4) constant %struct.ConstantArrayPointerStruct { float addrspace(4)* bitcast (i8 addrspace(4)* getelementptr (i8, i8 addrspace(4)* bitcast (%struct.ArrayStruct addrspace(4)* @constant_array_struct to i8 addrspace(4)*), i64 4) to float addrspace(4)*) }
// Bug 18567
__constant ConstantArrayPointerStruct constant_array_pointer_struct = {
&constant_array_struct.f
};
+__kernel void initializer_cast_is_valid_crash()
+{
+ unsigned char v512[64] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+ };
+
+}
diff --git a/test/CodeGenOpenCL/address-spaces.cl b/test/CodeGenOpenCL/address-spaces.cl
index cb641f593711..60f5e30d328b 100644
--- a/test/CodeGenOpenCL/address-spaces.cl
+++ b/test/CodeGenOpenCL/address-spaces.cl
@@ -1,11 +1,9 @@
// RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
// RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-opencl -emit-llvm -o - | FileCheck --check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-opencl -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | FileCheck %s -check-prefixes=CHECK,GIZ
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa-amdgizcl -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20GIZ
-// RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,SPIR %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -DCL20 -cl-std=CL2.0 -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20AMDGCN
+// RUN: %clang_cc1 %s -O0 -triple amdgcn-mesa-mesa3d -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
+// RUN: %clang_cc1 %s -O0 -triple r600-- -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
// SPIR: %struct.S = type { i32, i32, i32* }
// CL20SPIR: %struct.S = type { i32, i32, i32 addrspace(4)* }
@@ -15,18 +13,18 @@ struct S {
int *z;
};
-// CL20-DAG: @g_extern_var = external addrspace(1) global float
-// CL20-DAG: @l_extern_var = external addrspace(1) global float
+// CL20-DAG: @g_extern_var = external {{(dso_local )?}}addrspace(1) global float
+// CL20-DAG: @l_extern_var = external {{(dso_local )?}}addrspace(1) global float
// CL20-DAG: @test_static.l_static_var = internal addrspace(1) global float 0.000000e+00
// CL20-DAG: @g_static_var = internal addrspace(1) global float 0.000000e+00
#ifdef CL20
-// CL20-DAG: @g_s = common addrspace(1) global %struct.S zeroinitializer
+// CL20-DAG: @g_s = common {{(dso_local )?}}addrspace(1) global %struct.S zeroinitializer
struct S g_s;
#endif
// SPIR: i32* %arg
-// GIZ: i32 addrspace(5)* %arg
+// AMDGCN: i32 addrspace(5)* %arg
void f__p(__private int *arg) {}
// CHECK: i32 addrspace(1)* %arg
@@ -35,11 +33,12 @@ void f__g(__global int *arg) {}
// CHECK: i32 addrspace(3)* %arg
void f__l(__local int *arg) {}
-// CHECK: i32 addrspace(2)* %arg
+// SPIR: i32 addrspace(2)* %arg
+// AMDGCN: i32 addrspace(4)* %arg
void f__c(__constant int *arg) {}
// SPIR: i32* %arg
-// GIZ: i32 addrspace(5)* %arg
+// AMDGCN: i32 addrspace(5)* %arg
void fp(private int *arg) {}
// CHECK: i32 addrspace(1)* %arg
@@ -48,28 +47,29 @@ void fg(global int *arg) {}
// CHECK: i32 addrspace(3)* %arg
void fl(local int *arg) {}
-// CHECK: i32 addrspace(2)* %arg
+// SPIR: i32 addrspace(2)* %arg
+// AMDGCN: i32 addrspace(4)* %arg
void fc(constant int *arg) {}
#ifdef CL20
int i;
-// CL20-DAG: @i = common addrspace(1) global i32 0
+// CL20-DAG: @i = common {{(dso_local )?}}addrspace(1) global i32 0
int *ptr;
-// CL20SPIR-DAG: @ptr = common addrspace(1) global i32 addrspace(4)* null
-// CL20GIZ-DAG: @ptr = common addrspace(1) global i32* null
+// CL20SPIR-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32 addrspace(4)* null
+// CL20AMDGCN-DAG: @ptr = common {{(dso_local )?}}addrspace(1) global i32* null
#endif
// SPIR: i32* %arg
-// GIZ: i32 addrspace(5)* %arg
+// AMDGCN: i32 addrspace(5)* %arg
// CL20SPIR-DAG: i32 addrspace(4)* %arg
-// CL20GIZ-DAG: i32* %arg
+// CL20AMDGCN-DAG: i32* %arg
void f(int *arg) {
int i;
// SPIR: %i = alloca i32,
-// GIZ: %i = alloca i32{{.*}}addrspace(5)
+// AMDGCN: %i = alloca i32{{.*}}addrspace(5)
// CL20SPIR-DAG: %i = alloca i32,
-// CL20GIZ-DAG: %i = alloca i32{{.*}}addrspace(5)
+// CL20AMDGCN-DAG: %i = alloca i32{{.*}}addrspace(5)
#ifdef CL20
static int ii;
@@ -79,13 +79,13 @@ void f(int *arg) {
typedef int int_td;
typedef int *intp_td;
-// SPIR: define void @test_typedef(i32 addrspace(1)* %x, i32 addrspace(2)* %y, i32* %z)
+// SPIR: define {{(dso_local )?}}void @test_typedef(i32 addrspace(1)* %x, i32 addrspace(2)* %y, i32* %z)
void test_typedef(global int_td *x, constant int_td *y, intp_td z) {
*x = *y;
*z = 0;
}
-// SPIR: define void @test_struct()
+// SPIR: define {{(dso_local )?}}void @test_struct()
void test_struct() {
// SPIR: %ps = alloca %struct.S*
// CL20SPIR: %ps = alloca %struct.S addrspace(4)*
@@ -99,7 +99,7 @@ void test_struct() {
#endif
}
-// SPIR-LABEL: define void @test_void_par()
+// SPIR-LABEL: define {{(dso_local )?}}void @test_void_par()
void test_void_par(void) {}
// On ppc64 returns signext i32.
diff --git a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
index fefe1c4a41d5..59f38f80dca4 100644
--- a/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
+++ b/test/CodeGenOpenCL/amdgcn-automatic-variable.cl
@@ -62,7 +62,7 @@ void func2(void) {
// CHECK-LABEL: define void @func3()
// CHECK: %a = alloca [16 x [1 x float]], align 4, addrspace(5)
// CHECK: %[[CAST:.+]] = bitcast [16 x [1 x float]] addrspace(5)* %a to i8 addrspace(5)*
-// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* %[[CAST]], i8 0, i64 64, i32 4, i1 false)
+// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 4 %[[CAST]], i8 0, i64 64, i1 false)
void func3(void) {
float a[16][1] = {{0.}};
}
diff --git a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 21bdb15b094d..aec00e76014e 100644
--- a/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -1,6 +1,6 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-amdgiz -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
-// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,R600 %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
typedef __attribute__(( ext_vector_type(2) )) char char2;
typedef __attribute__(( ext_vector_type(3) )) char char3;
@@ -309,8 +309,7 @@ void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t ar
// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1)
void func_different_size_type_pair_arg(different_size_type_pair arg1) { }
-// AMDGCN: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
-// R600: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg)
+// CHECK: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg)
void func_flexible_array_arg(flexible_array arg) { }
// CHECK: define float @func_f32_ret()
@@ -405,16 +404,14 @@ struct_arr16 func_ret_struct_arr16()
return s;
}
-// AMDGCN: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result)
-// R600: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result)
+// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result)
struct_arr32 func_ret_struct_arr32()
{
struct_arr32 s = { 0 };
return s;
}
-// AMDGCN: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result)
-// R600: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result)
+// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result)
struct_arr33 func_ret_struct_arr33()
{
struct_arr33 s = { 0 };
@@ -428,7 +425,7 @@ struct_char_arr32 func_ret_struct_char_arr32()
return s;
}
-// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #0 {
+// CHECK: define i32 @func_transparent_union_ret() local_unnamed_addr #1 {
// CHECK: ret i32 0
transparent_u func_transparent_union_ret()
{
@@ -443,8 +440,7 @@ different_size_type_pair func_different_size_type_pair_ret()
return s;
}
-// AMDGCN: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result)
-// R600: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result)
+// CHECK: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result)
flexible_array func_flexible_array_ret()
{
flexible_array s = { 0 };
@@ -454,13 +450,11 @@ flexible_array func_flexible_array_ret()
// CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2)
void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { }
-// AMDGCN: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
-// R600: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s)
+// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s)
void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { }
// XXX - Why don't the inner structs flatten?
-// AMDGCN: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
-// R600: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4)
+// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4)
void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { }
// CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2)
@@ -475,8 +469,7 @@ double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) {
// CHECK: define void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14)
void func_large_struct_padding_arg_direct(large_struct_padding arg) { }
-// AMDGCN: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
-// R600: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding* byval nocapture readonly align 8 %arg)
+// CHECK: define void @func_large_struct_padding_arg_store(%struct.large_struct_padding addrspace(1)* nocapture %out, %struct.large_struct_padding addrspace(5)* byval nocapture readonly align 8 %arg)
void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) {
*out = arg;
}
@@ -486,8 +479,7 @@ void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t ar
// Function signature from blender, nothing should be passed byval. The v3i32
// should not count as 4 passed registers.
-// AMDGCN: define void @v3i32_pair_reg_count(%struct.int3_pair addrspace(5)* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
-// R600: define void @v3i32_pair_reg_count(%struct.int3_pair* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
+// CHECK: define void @v3i32_pair_reg_count(%struct.int3_pair addrspace(5)* nocapture %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> %arg4, float %arg5)
void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { }
// Each short4 should fit pack into 2 registers.
@@ -495,8 +487,7 @@ void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair
void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
short4 arg4, short4 arg5, struct_4regs arg6) { }
-// AMDGCN: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
-// R600: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// CHECK: define void @v4i16_pair_reg_count_over(<4 x i16> %arg0, <4 x i16> %arg1, <4 x i16> %arg2, <4 x i16> %arg3, <4 x i16> %arg4, <4 x i16> %arg5, <4 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3,
short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { }
@@ -504,8 +495,7 @@ void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg
void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
short3 arg4, short3 arg5, struct_4regs arg6) { }
-// AMDGCN: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
-// R600: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// CHECK: define void @v3i16_reg_count_over(<3 x i16> %arg0, <3 x i16> %arg1, <3 x i16> %arg2, <3 x i16> %arg3, <3 x i16> %arg4, <3 x i16> %arg5, <3 x i16> %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3,
short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { }
@@ -515,8 +505,7 @@ void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
short2 arg8, short2 arg9, short2 arg10, short2 arg11,
struct_4regs arg13) { }
-// AMDGCN: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
-// R600: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs* byval nocapture align 4 %arg13)
+// CHECK: define void @v2i16_reg_count_over(<2 x i16> %arg0, <2 x i16> %arg1, <2 x i16> %arg2, <2 x i16> %arg3, <2 x i16> %arg4, <2 x i16> %arg5, <2 x i16> %arg6, <2 x i16> %arg7, <2 x i16> %arg8, <2 x i16> %arg9, <2 x i16> %arg10, <2 x i16> %arg11, <2 x i16> %arg12, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg13)
void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
short2 arg4, short2 arg5, short2 arg6, short2 arg7,
short2 arg8, short2 arg9, short2 arg10, short2 arg11,
@@ -526,8 +515,7 @@ void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3,
void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, struct_4regs arg6) { }
-// AMDGCN: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
-// R600: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs* byval nocapture align 4 %arg7)
+// CHECK: define void @v2i8_reg_count_over(<2 x i8> %arg0, <2 x i8> %arg1, <2 x i8> %arg2, <2 x i8> %arg3, <2 x i8> %arg4, <2 x i8> %arg5, i32 %arg6, %struct.struct_4regs addrspace(5)* byval nocapture align 4 %arg7)
void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3,
char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { }
diff --git a/test/CodeGenOpenCL/amdgpu-alignment.cl b/test/CodeGenOpenCL/amdgpu-alignment.cl
index 70a22c9f756c..b5dc47adbc2c 100644
--- a/test/CodeGenOpenCL/amdgpu-alignment.cl
+++ b/test/CodeGenOpenCL/amdgpu-alignment.cl
@@ -336,91 +336,91 @@ kernel void local_memory_alignment_arg(
}
// CHECK-LABEL: @private_memory_alignment_alloca(
-// CHECK: %private_i8 = alloca [4 x i8], align 1
-// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2
-// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4
-// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4
-// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8
-// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16
-// CHECK: %private_i16 = alloca [4 x i16], align 2
-// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4
-// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8
-// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8
-// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16
-// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32
-// CHECK: %private_i32 = alloca [4 x i32], align 4
-// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8
-// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16
-// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16
-// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32
-// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64
-// CHECK: %private_i64 = alloca [4 x i64], align 8
-// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16
-// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32
-// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32
-// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64
-// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128
-// CHECK: %private_f16 = alloca [4 x half], align 2
-// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4
-// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8
-// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8
-// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16
-// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32
-// CHECK: %private_f32 = alloca [4 x float], align 4
-// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8
-// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16
-// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16
-// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32
-// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64
-// CHECK: %private_f64 = alloca [4 x double], align 8
-// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16
-// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32
-// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32
-// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64
-// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128
+// CHECK: %private_i8 = alloca [4 x i8], align 1, addrspace(5)
+// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2, addrspace(5)
+// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4, addrspace(5)
+// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4, addrspace(5)
+// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8, addrspace(5)
+// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16, addrspace(5)
+// CHECK: %private_i16 = alloca [4 x i16], align 2, addrspace(5)
+// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4, addrspace(5)
+// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8, addrspace(5)
+// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8, addrspace(5)
+// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16, addrspace(5)
+// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32, addrspace(5)
+// CHECK: %private_i32 = alloca [4 x i32], align 4, addrspace(5)
+// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8, addrspace(5)
+// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16, addrspace(5)
+// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16, addrspace(5)
+// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32, addrspace(5)
+// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64, addrspace(5)
+// CHECK: %private_i64 = alloca [4 x i64], align 8, addrspace(5)
+// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16, addrspace(5)
+// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32, addrspace(5)
+// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32, addrspace(5)
+// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64, addrspace(5)
+// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128, addrspace(5)
+// CHECK: %private_f16 = alloca [4 x half], align 2, addrspace(5)
+// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4, addrspace(5)
+// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8, addrspace(5)
+// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8, addrspace(5)
+// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16, addrspace(5)
+// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32, addrspace(5)
+// CHECK: %private_f32 = alloca [4 x float], align 4, addrspace(5)
+// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8, addrspace(5)
+// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16, addrspace(5)
+// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16, addrspace(5)
+// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32, addrspace(5)
+// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64, addrspace(5)
+// CHECK: %private_f64 = alloca [4 x double], align 8, addrspace(5)
+// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16, addrspace(5)
+// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32, addrspace(5)
+// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32, addrspace(5)
+// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64, addrspace(5)
+// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128, addrspace(5)
-// CHECK: store volatile i8 0, i8* %arraydecay, align 1
-// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8>* %arraydecay{{[0-9]+}}, align 2
-// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8>* %storetmp, align 4
-// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8>* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile i16 0, i16* %arraydecay{{[0-9]+}}, align 2
-// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16>* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16>* %storetmp{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile i32 0, i32* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32>* %storetmp16, align 16
-// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile i64 0, i64* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64>* %storetmp23, align 32
-// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64>* %arraydecay{{[0-9]+}}, align 128
-// CHECK: store volatile half 0xH0000, half* %arraydecay{{[0-9]+}}, align 2
-// CHECK: store volatile <2 x half> zeroinitializer, <2 x half>* %arraydecay{{[0-9]+}}, align 4
-// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half>* %storetmp{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x half> zeroinitializer, <4 x half>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <8 x half> zeroinitializer, <8 x half>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <16 x half> zeroinitializer, <16 x half>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile float 0.000000e+00, float* %arraydecay34, align 4
-// CHECK: store volatile <2 x float> zeroinitializer, <2 x float>* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float>* %storetmp{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x float> zeroinitializer, <4 x float>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <8 x float> zeroinitializer, <8 x float>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <16 x float> zeroinitializer, <16 x float>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile double 0.000000e+00, double* %arraydecay{{[0-9]+}}, align 8
-// CHECK: store volatile <2 x double> zeroinitializer, <2 x double>* %arraydecay{{[0-9]+}}, align 16
-// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double>* %storetmp{{[0-9]+}}, align 32
-// CHECK: store volatile <4 x double> zeroinitializer, <4 x double>* %arraydecay{{[0-9]+}}, align 32
-// CHECK: store volatile <8 x double> zeroinitializer, <8 x double>* %arraydecay{{[0-9]+}}, align 64
-// CHECK: store volatile <16 x double> zeroinitializer, <16 x double>* %arraydecay{{[0-9]+}}, align 128
+// CHECK: store volatile i8 0, i8 addrspace(5)* %arraydecay, align 1
+// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(5)* %storetmp, align 4
+// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile i16 0, i16 addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(5)* %storetmp{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile i32 0, i32 addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(5)* %storetmp16, align 16
+// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile i64 0, i64 addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(5)* %storetmp23, align 32
+// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
+// CHECK: store volatile half 0xH0000, half addrspace(5)* %arraydecay{{[0-9]+}}, align 2
+// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 4
+// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(5)* %storetmp{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile float 0.000000e+00, float addrspace(5)* %arraydecay34, align 4
+// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(5)* %storetmp{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile double 0.000000e+00, double addrspace(5)* %arraydecay{{[0-9]+}}, align 8
+// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 16
+// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(5)* %storetmp{{[0-9]+}}, align 32
+// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 32
+// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 64
+// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(5)* %arraydecay{{[0-9]+}}, align 128
kernel void private_memory_alignment_alloca()
{
volatile private char private_i8[4];
diff --git a/test/CodeGenOpenCL/amdgpu-attrs.cl b/test/CodeGenOpenCL/amdgpu-attrs.cl
index 2696123f1434..ad13a2cae3c3 100644
--- a/test/CodeGenOpenCL/amdgpu-attrs.cl
+++ b/test/CodeGenOpenCL/amdgpu-attrs.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s -check-prefix=NONAMDHSA
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
__attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics
@@ -138,12 +139,18 @@ kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
// CHECK: define amdgpu_kernel void @reqd_work_group_size_32_2_1_flat_work_group_size_16_128() [[FLAT_WORK_GROUP_SIZE_16_128:#[0-9]+]]
}
+void a_function() {
+// CHECK: define void @a_function() [[A_FUNCTION:#[0-9]+]]
+}
+
// Make sure this is silently accepted on other targets.
// X86-NOT: "amdgpu-flat-work-group-size"
// X86-NOT: "amdgpu-waves-per-eu"
// X86-NOT: "amdgpu-num-vgpr"
// X86-NOT: "amdgpu-num-sgpr"
+// X86-NOT: "amdgpu-implicitarg-num-bytes"
+// NONAMDHSA-NOT: "amdgpu-implicitarg-num-bytes"
// CHECK-NOT: "amdgpu-flat-work-group-size"="0,0"
// CHECK-NOT: "amdgpu-waves-per-eu"="0"
@@ -151,28 +158,30 @@ kernel void reqd_work_group_size_32_2_1_flat_work_group_size_16_128() {
// CHECK-NOT: "amdgpu-num-sgpr"="0"
// CHECK-NOT: "amdgpu-num-vgpr"="0"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64" "amdgpu-implicitarg-num-bytes"="48"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128" "amdgpu-implicitarg-num-bytes"="48"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-implicitarg-num-bytes"="48" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[A_FUNCTION]] = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false"
diff --git a/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl b/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
index 4d46b40561ee..894611ea88e8 100644
--- a/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
+++ b/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
@@ -51,31 +51,31 @@ int *constant FileVar14 = 0;
kernel void kernel1(
// CHECK-DAG: ![[KERNELARG0:[0-9]+]] = !DILocalVariable(name: "KernelArg0", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)** {{.*}}, metadata ![[KERNELARG0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[KERNELARG0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
global int *KernelArg0,
// CHECK-DAG: ![[KERNELARG1:[0-9]+]] = !DILocalVariable(name: "KernelArg1", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(2)** {{.*}}, metadata ![[KERNELARG1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[KERNELARG1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
constant int *KernelArg1,
// CHECK-DAG: ![[KERNELARG2:[0-9]+]] = !DILocalVariable(name: "KernelArg2", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)** {{.*}}, metadata ![[KERNELARG2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[KERNELARG2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
local int *KernelArg2) {
private int *Tmp0;
int *Tmp1;
// CHECK-DAG: ![[FUNCVAR0:[0-9]+]] = !DILocalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)** {{.*}}, metadata ![[FUNCVAR0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
global int *FuncVar0 = KernelArg0;
// CHECK-DAG: ![[FUNCVAR1:[0-9]+]] = !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(2)** {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
constant int *FuncVar1 = KernelArg1;
// CHECK-DAG: ![[FUNCVAR2:[0-9]+]] = !DILocalVariable(name: "FuncVar2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)** {{.*}}, metadata ![[FUNCVAR2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
local int *FuncVar2 = KernelArg2;
// CHECK-DAG: ![[FUNCVAR3:[0-9]+]] = !DILocalVariable(name: "FuncVar3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[FUNCVAR3]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR3]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
private int *FuncVar3 = Tmp0;
// CHECK-DAG: ![[FUNCVAR4:[0-9]+]] = !DILocalVariable(name: "FuncVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)** {{.*}}, metadata ![[FUNCVAR4]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* addrspace(5)* {{.*}}, metadata ![[FUNCVAR4]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
int *FuncVar4 = Tmp1;
// CHECK-DAG: ![[FUNCVAR5:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true)
@@ -111,18 +111,18 @@ kernel void kernel1(
int *local FuncVar14; FuncVar14 = Tmp1;
// CHECK-DAG: ![[FUNCVAR15:[0-9]+]] = !DILocalVariable(name: "FuncVar15", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)** {{.*}}, metadata ![[FUNCVAR15]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR15]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
global int *private FuncVar15 = KernelArg0;
// CHECK-DAG: ![[FUNCVAR16:[0-9]+]] = !DILocalVariable(name: "FuncVar16", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(2)** {{.*}}, metadata ![[FUNCVAR16]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR16]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
constant int *private FuncVar16 = KernelArg1;
// CHECK-DAG: ![[FUNCVAR17:[0-9]+]] = !DILocalVariable(name: "FuncVar17", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)** {{.*}}, metadata ![[FUNCVAR17]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(3)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR17]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
local int *private FuncVar17 = KernelArg2;
// CHECK-DAG: ![[FUNCVAR18:[0-9]+]] = !DILocalVariable(name: "FuncVar18", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32** {{.*}}, metadata ![[FUNCVAR18]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(5)* addrspace(5)* {{.*}}, metadata ![[FUNCVAR18]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
private int *private FuncVar18 = Tmp0;
// CHECK-DAG: ![[FUNCVAR19:[0-9]+]] = !DILocalVariable(name: "FuncVar19", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}})
- // CHECK-DAG: call void @llvm.dbg.declare(metadata i32 addrspace(4)** {{.*}}, metadata ![[FUNCVAR19]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
+ // CHECK-DAG: call void @llvm.dbg.declare(metadata i32* addrspace(5)* {{.*}}, metadata ![[FUNCVAR19]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}}
int *private FuncVar19 = Tmp1;
}
diff --git a/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index b2db4d782719..75fceccb2a45 100644
--- a/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -1,7 +1,11 @@
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -emit-llvm -o - -triple amdgcn | FileCheck %s
typedef struct {int a;} ndrange_t;
+void callee(long id, global long *out) {
+ out[id] = id;
+}
+
// CHECK-LABEL: define amdgpu_kernel void @test
kernel void test(global char *a, char b, global long *c, long d) {
queue_t default_queue;
@@ -24,22 +28,31 @@ kernel void test(global char *a, char b, global long *c, long d) {
c[0] = d;
((local int*)lp)[0] = 1;
}, 100);
+
+ void (^block)(void) = ^{
+ callee(d, c);
+ };
+
+ enqueue_kernel(default_queue, flags, ndrange, block);
}
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }>)
// CHECK-SAME: #[[ATTR:[0-9]+]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
// CHECK: entry:
-// CHECK: %1 = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, align 8
-// CHECK: store <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %1, align 8
-// CHECK: %2 = addrspacecast <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %1 to i8 addrspace(4)*
-// CHECK: call void @__test_block_invoke(i8 addrspace(4)* %2)
+// CHECK: %1 = alloca <{ i32, i32, i8 addrspace(1)*, i8 }>, align 8, addrspace(5)
+// CHECK: store <{ i32, i32, i8 addrspace(1)*, i8 }> %0, <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1, align 8
+// CHECK: %2 = addrspacecast <{ i32, i32, i8 addrspace(1)*, i8 }> addrspace(5)* %1 to i8*
+// CHECK: call void @__test_block_invoke(i8* %2)
// CHECK: ret void
// CHECK:}
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
-// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i64, i64 addrspace(1)* }>)
// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
// CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }
diff --git a/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
new file mode 100644
index 000000000000..bcb00be8c802
--- /dev/null
+++ b/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
+
+// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
+void foo(void) {}
diff --git a/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl b/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
deleted file mode 100644
index 4cb8d95fa098..000000000000
--- a/test/CodeGenOpenCL/amdgpu-env-amdgiz.cl
+++ /dev/null
@@ -1,9 +0,0 @@
-// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgiz -emit-llvm -o - | FileCheck -check-prefix=GIZ %s
-// RUN: %clang_cc1 %s -O0 -triple amdgcn---amdgizcl -emit-llvm -o - | FileCheck -check-prefix=GIZ %s
-
-// CHECK: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
-// GIZ: target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
-void foo(void) {}
-
diff --git a/test/CodeGenOpenCL/amdgpu-features.cl b/test/CodeGenOpenCL/amdgpu-features.cl
new file mode 100644
index 000000000000..a1815cedcd49
--- /dev/null
+++ b/test/CodeGenOpenCL/amdgpu-features.cl
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+
+// Check that appropriate features are defined for every supported AMDGPU
+// "-target" and "-mcpu" options.
+
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx904 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX904 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX906 %s
+
+// GFX904: "target-features"="+16-bit-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
+// GFX906: "target-features"="+16-bit-insts,+dl-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime"
+
+kernel void test() {}
diff --git a/test/CodeGenOpenCL/amdgpu-nullptr.cl b/test/CodeGenOpenCL/amdgpu-nullptr.cl
index 513d56c19d60..688d3a58e90b 100644
--- a/test/CodeGenOpenCL/amdgpu-nullptr.cl
+++ b/test/CodeGenOpenCL/amdgpu-nullptr.cl
@@ -21,48 +21,48 @@ typedef struct {
// Test 0 as initializer.
-// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8* null, align 4
+// CHECK: @private_p = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
private char *private_p = 0;
-// CHECK: @local_p = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// CHECK: @local_p = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
local char *local_p = 0;
// CHECK: @global_p = local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
global char *global_p = 0;
-// CHECK: @constant_p = local_unnamed_addr addrspace(1) global i8 addrspace(2)* null, align 8
+// CHECK: @constant_p = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
constant char *constant_p = 0;
-// CHECK: @generic_p = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+// CHECK: @generic_p = local_unnamed_addr addrspace(1) global i8* null, align 8
generic char *generic_p = 0;
// Test NULL as initializer.
-// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8* null, align 4
+// CHECK: @private_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
private char *private_p_NULL = NULL;
-// CHECK: @local_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// CHECK: @local_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
local char *local_p_NULL = NULL;
// CHECK: @global_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
global char *global_p_NULL = NULL;
-// CHECK: @constant_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(2)* null, align 8
+// CHECK: @constant_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
constant char *constant_p_NULL = NULL;
-// CHECK: @generic_p_NULL = local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+// CHECK: @generic_p_NULL = local_unnamed_addr addrspace(1) global i8* null, align 8
generic char *generic_p_NULL = NULL;
// Test constant folding of null pointer.
// A null pointer should be folded to a null pointer in the target address space.
-// CHECK: @fold_generic = local_unnamed_addr addrspace(1) global i32 addrspace(4)* null, align 8
+// CHECK: @fold_generic = local_unnamed_addr addrspace(1) global i32* null, align 8
generic int *fold_generic = (global int*)(generic float*)(private char*)0;
-// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16* null, align 4
+// CHECK: @fold_priv = local_unnamed_addr addrspace(1) global i16 addrspace(5)* null, align 4
private short *fold_priv = (private short*)(generic int*)(global void*)0;
-// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8* inttoptr (i32 10 to i8*), align 4
+// CHECK: @fold_priv_arith = local_unnamed_addr addrspace(1) global i8 addrspace(5)* inttoptr (i32 10 to i8 addrspace(5)*), align 4
private char *fold_priv_arith = (private char*)0 + 10;
// CHECK: @fold_int = local_unnamed_addr addrspace(1) global i32 14, align 4
@@ -99,12 +99,12 @@ int fold_int5_local = (int) &((local StructTy1*)0)->p2;
// Test static variable initialization.
-// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global i8* null, align 4
-// NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, align 8
+// NOOPT: @test_static_var_private.sp1 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp2 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp3 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp4 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.sp5 = internal addrspace(1) global i8 addrspace(5)* null, align 4
+// NOOPT: @test_static_var_private.SS1 = internal addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
// NOOPT: @test_static_var_private.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
void test_static_var_private(void) {
@@ -118,12 +118,12 @@ void test_static_var_private(void) {
static StructTy2 SS2;
}
-// NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
-// NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
-// NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp1 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp2 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
+// NOOPT: @test_static_var_local.sp3 = internal addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
// NOOPT: @test_static_var_local.sp4 = internal addrspace(1) global i8 addrspace(3)* null, align 4
// NOOPT: @test_static_var_local.sp5 = internal addrspace(1) global i8 addrspace(3)* null, align 4
-// NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, align 8
+// NOOPT: @test_static_var_local.SS1 = internal addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
// NOOPT: @test_static_var_local.SS2 = internal addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
void test_static_var_local(void) {
static local char *sp1 = 0;
@@ -138,14 +138,14 @@ void test_static_var_local(void) {
// Test function-scope variable initialization.
// NOOPT-LABEL: @test_func_scope_var_private(
-// NOOPT: store i8* null, i8** %sp1, align 4
-// NOOPT: store i8* null, i8** %sp2, align 4
-// NOOPT: store i8* null, i8** %sp3, align 4
-// NOOPT: store i8* null, i8** %sp4, align 4
-// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1* %SS1 to i8*
-// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* %[[SS1]], i8 addrspace(2)* bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_private.SS1 to i8 addrspace(2)*), i64 32, i32 8, i1 false)
-// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2* %SS2 to i8*
-// NOOPT: call void @llvm.memset.p0i8.i64(i8* %[[SS2]], i8 0, i64 24, i32 8, i1 false)
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp1, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp2, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp3, align 4
+// NOOPT: store i8 addrspace(5)* null, i8 addrspace(5)* addrspace(5)* %sp4, align 4
+// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_private.SS1 to i8 addrspace(4)*), i64 32, i1 false)
+// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false)
void test_func_scope_var_private(void) {
private char *sp1 = 0;
private char *sp2 = NULL;
@@ -158,14 +158,14 @@ void test_func_scope_var_private(void) {
// Test function-scope variable initialization.
// NOOPT-LABEL: @test_func_scope_var_local(
-// NOOPT: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)** %sp1, align 4
-// NOOPT: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)** %sp2, align 4
-// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)** %sp3, align 4
-// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)** %sp4, align 4
-// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1* %SS1 to i8*
-// NOOPT: call void @llvm.memcpy.p0i8.p2i8.i64(i8* %[[SS1]], i8 addrspace(2)* bitcast (%struct.StructTy1 addrspace(2)* @test_func_scope_var_local.SS1 to i8 addrspace(2)*), i64 32, i32 8, i1 false)
-// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2* %SS2 to i8*
-// NOOPT: call void @llvm.memset.p0i8.i64(i8* %[[SS2]], i8 0, i64 24, i32 8, i1 false)
+// NOOPT: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(5)* %sp1, align 4
+// NOOPT: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(5)* %sp2, align 4
+// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp3, align 4
+// NOOPT: store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(5)* %sp4, align 4
+// NOOPT: %[[SS1:.*]] = bitcast %struct.StructTy1 addrspace(5)* %SS1 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 8 %[[SS1]], i8 addrspace(4)* align 8 bitcast (%struct.StructTy1 addrspace(4)* @test_func_scope_var_local.SS1 to i8 addrspace(4)*), i64 32, i1 false)
+// NOOPT: %[[SS2:.*]] = bitcast %struct.StructTy2 addrspace(5)* %SS2 to i8 addrspace(5)*
+// NOOPT: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 %[[SS2]], i8 0, i64 24, i1 false)
void test_func_scope_var_local(void) {
local char *sp1 = 0;
local char *sp2 = NULL;
@@ -183,31 +183,31 @@ void test_func_scope_var_local(void) {
// cannot have common linkage since common linkage requires zero initialization
// and does not have explicit section.
-// CHECK: @p1 = common local_unnamed_addr addrspace(1) global i8* null, align 4
+// CHECK: @p1 = common local_unnamed_addr addrspace(1) global i8 addrspace(5)* null, align 4
private char *p1;
-// CHECK: @p2 = weak local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), align 4
+// CHECK: @p2 = weak local_unnamed_addr addrspace(1) global i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), align 4
local char *p2;
-// CHECK: @p3 = common local_unnamed_addr addrspace(1) global i8 addrspace(2)* null, align 8
+// CHECK: @p3 = common local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
constant char *p3;
// CHECK: @p4 = common local_unnamed_addr addrspace(1) global i8 addrspace(1)* null, align 8
global char *p4;
-// CHECK: @p5 = common local_unnamed_addr addrspace(1) global i8 addrspace(4)* null, align 8
+// CHECK: @p5 = common local_unnamed_addr addrspace(1) global i8* null, align 8
generic char *p5;
-// Test default initialization of sturcture.
+// Test default initialization of structure.
-// CHECK: @S1 = weak local_unnamed_addr addrspace(1) global %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, align 8
+// CHECK: @S1 = weak local_unnamed_addr addrspace(1) global %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, align 8
StructTy1 S1;
// CHECK: @S2 = common local_unnamed_addr addrspace(1) global %struct.StructTy2 zeroinitializer, align 8
StructTy2 S2;
// Test default initialization of array.
-// CHECK: @A1 = weak local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] [%struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }, %struct.StructTy1 { i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(2)* null, i8 addrspace(1)* null, i8 addrspace(4)* null }], align 8
+// CHECK: @A1 = weak local_unnamed_addr addrspace(1) global [2 x %struct.StructTy1] [%struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }, %struct.StructTy1 { i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(4)* null, i8 addrspace(1)* null, i8* null }], align 8
StructTy1 A1[2];
// CHECK: @A2 = common local_unnamed_addr addrspace(1) global [2 x %struct.StructTy2] zeroinitializer, align 8
@@ -216,14 +216,14 @@ StructTy2 A2[2];
// Test comparison with 0.
// CHECK-LABEL: cmp_private
-// CHECK: icmp eq i8* %p, null
+// CHECK: icmp eq i8 addrspace(5)* %p, null
void cmp_private(private char* p) {
if (p != 0)
*p = 0;
}
// CHECK-LABEL: cmp_local
-// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
void cmp_local(local char* p) {
if (p != 0)
*p = 0;
@@ -237,7 +237,7 @@ void cmp_global(global char* p) {
}
// CHECK-LABEL: cmp_constant
-// CHECK: icmp eq i8 addrspace(2)* %p, null
+// CHECK: icmp eq i8 addrspace(4)* %p, null
char cmp_constant(constant char* p) {
if (p != 0)
return *p;
@@ -246,7 +246,7 @@ char cmp_constant(constant char* p) {
}
// CHECK-LABEL: cmp_generic
-// CHECK: icmp eq i8 addrspace(4)* %p, null
+// CHECK: icmp eq i8* %p, null
void cmp_generic(generic char* p) {
if (p != 0)
*p = 0;
@@ -255,14 +255,14 @@ void cmp_generic(generic char* p) {
// Test comparison with NULL.
// CHECK-LABEL: cmp_NULL_private
-// CHECK: icmp eq i8* %p, null
+// CHECK: icmp eq i8 addrspace(5)* %p, null
void cmp_NULL_private(private char* p) {
if (p != NULL)
*p = 0;
}
// CHECK-LABEL: cmp_NULL_local
-// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
void cmp_NULL_local(local char* p) {
if (p != NULL)
*p = 0;
@@ -276,7 +276,7 @@ void cmp_NULL_global(global char* p) {
}
// CHECK-LABEL: cmp_NULL_constant
-// CHECK: icmp eq i8 addrspace(2)* %p, null
+// CHECK: icmp eq i8 addrspace(4)* %p, null
char cmp_NULL_constant(constant char* p) {
if (p != NULL)
return *p;
@@ -285,7 +285,7 @@ char cmp_NULL_constant(constant char* p) {
}
// CHECK-LABEL: cmp_NULL_generic
-// CHECK: icmp eq i8 addrspace(4)* %p, null
+// CHECK: icmp eq i8* %p, null
void cmp_NULL_generic(generic char* p) {
if (p != NULL)
*p = 0;
@@ -293,11 +293,11 @@ void cmp_NULL_generic(generic char* p) {
// Test storage 0 as null pointer.
// CHECK-LABEL: test_storage_null_pointer
-// CHECK: store i8* null, i8* addrspace(4)* %arg_private
-// CHECK: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(4)* %arg_local
-// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(4)* %arg_global
-// CHECK: store i8 addrspace(2)* null, i8 addrspace(2)* addrspace(4)* %arg_constant
-// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)* addrspace(4)* %arg_generic
+// CHECK: store i8 addrspace(5)* null, i8 addrspace(5)** %arg_private
+// CHECK: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)** %arg_local
+// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)** %arg_global
+// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)** %arg_constant
+// CHECK: store i8* null, i8** %arg_generic
void test_storage_null_pointer(private char** arg_private,
local char** arg_local,
global char** arg_global,
@@ -312,11 +312,11 @@ void test_storage_null_pointer(private char** arg_private,
// Test storage NULL as null pointer.
// CHECK-LABEL: test_storage_null_pointer_NULL
-// CHECK: store i8* null, i8* addrspace(4)* %arg_private
-// CHECK: store i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(3)* addrspace(4)* %arg_local
-// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(4)* %arg_global
-// CHECK: store i8 addrspace(2)* null, i8 addrspace(2)* addrspace(4)* %arg_constant
-// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)* addrspace(4)* %arg_generic
+// CHECK: store i8 addrspace(5)* null, i8 addrspace(5)** %arg_private
+// CHECK: store i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(3)** %arg_local
+// CHECK: store i8 addrspace(1)* null, i8 addrspace(1)** %arg_global
+// CHECK: store i8 addrspace(4)* null, i8 addrspace(4)** %arg_constant
+// CHECK: store i8* null, i8** %arg_generic
void test_storage_null_pointer_NULL(private char** arg_private,
local char** arg_local,
global char** arg_global,
@@ -337,8 +337,8 @@ void test_pass_null_pointer_arg_calee(private char* arg_private,
generic char* arg_generic);
// CHECK-LABEL: test_pass_null_pointer_arg
-// CHECK: call void @test_pass_null_pointer_arg_calee(i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(2)* null, i8 addrspace(4)* null)
-// CHECK: call void @test_pass_null_pointer_arg_calee(i8* null, i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(2)* null, i8 addrspace(4)* null)
+// CHECK: call void @test_pass_null_pointer_arg_calee(i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(4)* null, i8* null)
+// CHECK: call void @test_pass_null_pointer_arg_calee(i8 addrspace(5)* null, i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*), i8 addrspace(1)* null, i8 addrspace(4)* null, i8* null)
void test_pass_null_pointer_arg(void) {
test_pass_null_pointer_arg_calee(0, 0, 0, 0, 0);
test_pass_null_pointer_arg_calee(NULL, NULL, NULL, NULL, NULL);
@@ -352,8 +352,8 @@ void test_cast_null_pointer_to_sizet_calee(size_t arg_private,
size_t arg_generic);
// CHECK-LABEL: test_cast_null_pointer_to_sizet
-// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
-// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
+// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
+// CHECK: call void @test_cast_null_pointer_to_sizet_calee(i64 0, i64 ptrtoint (i8 addrspace(3)* addrspacecast (i8* null to i8 addrspace(3)*) to i64), i64 0, i64 0, i64 0)
void test_cast_null_pointer_to_sizet(void) {
test_cast_null_pointer_to_sizet_calee((size_t)((private char*)0),
(size_t)((local char*)0),
@@ -367,7 +367,7 @@ void test_cast_null_pointer_to_sizet(void) {
(size_t)((generic char*)NULL));
}
-// Test comparision between null pointers.
+// Test comparison between null pointers.
#define TEST_EQ00(addr1, addr2) int test_eq00_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)0; }
#define TEST_EQ0N(addr1, addr2) int test_eq0N_##addr1##_##addr2(void) { return (addr1 char*)0 == (addr2 char*)NULL; }
#define TEST_EQN0(addr1, addr2) int test_eqN0_##addr1##_##addr2(void) { return (addr1 char*)NULL == (addr2 char*)0; }
@@ -465,14 +465,14 @@ TEST_EQ00(constant, constant)
// Test cast to bool.
// CHECK-LABEL: cast_bool_private
-// CHECK: icmp eq i8* %p, null
+// CHECK: icmp eq i8 addrspace(5)* %p, null
void cast_bool_private(private char* p) {
if (p)
*p = 0;
}
// CHECK-LABEL: cast_bool_local
-// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
void cast_bool_local(local char* p) {
if (p)
*p = 0;
@@ -486,7 +486,7 @@ void cast_bool_global(global char* p) {
}
// CHECK-LABEL: cast_bool_constant
-// CHECK: icmp eq i8 addrspace(2)* %p, null
+// CHECK: icmp eq i8 addrspace(4)* %p, null
char cast_bool_constant(constant char* p) {
if (p)
return *p;
@@ -495,7 +495,7 @@ char cast_bool_constant(constant char* p) {
}
// CHECK-LABEL: cast_bool_generic
-// CHECK: icmp eq i8 addrspace(4)* %p, null
+// CHECK: icmp eq i8* %p, null
void cast_bool_generic(generic char* p) {
if (p)
*p = 0;
@@ -510,7 +510,7 @@ typedef struct {
} StructTy3;
// CHECK-LABEL: test_memset_private
-// CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull {{.*}}, i8 0, i64 40, i32 8, i1 false)
+// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 8 {{.*}}, i8 0, i64 40, i1 false)
void test_memset_private(private StructTy3 *ptr) {
StructTy3 S3 = {0, 0, 0, 0, 0};
*ptr = S3;
@@ -520,13 +520,13 @@ void test_memset_private(private StructTy3 *ptr) {
// A 0 literal casted to pointer should become a null pointer.
// CHECK-LABEL: test_cast_0_to_local_ptr
-// CHECK: ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+// CHECK: ret i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
local int* test_cast_0_to_local_ptr(void) {
return (local int*)0;
}
// CHECK-LABEL: test_cast_0_to_private_ptr
-// CHECK: ret i32* null
+// CHECK: ret i32 addrspace(5)* null
private int* test_cast_0_to_private_ptr(void) {
return (private int*)0;
}
@@ -536,7 +536,7 @@ private int* test_cast_0_to_private_ptr(void) {
// zero value.
// CHECK-LABEL: test_cast_int_to_ptr1_private
-// CHECK: ret i32* null
+// CHECK: ret i32 addrspace(5)* null
private int* test_cast_int_to_ptr1_private(void) {
return (private int*)((void)0, 0);
}
@@ -548,7 +548,7 @@ local int* test_cast_int_to_ptr1_local(void) {
}
// CHECK-LABEL: test_cast_int_to_ptr2
-// CHECK: ret i32* null
+// CHECK: ret i32 addrspace(5)* null
private int* test_cast_int_to_ptr2(void) {
int x = 0;
return (private int*)x;
@@ -568,7 +568,7 @@ int test_and_nullptr(int a) {
}
// CHECK-LABEL: test_not_private_ptr
-// CHECK: %[[lnot:.*]] = icmp eq i8* %p, null
+// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(5)* %p, null
// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
// CHECK: ret i32 %[[lnot_ext]]
int test_not_private_ptr(private char* p) {
@@ -576,7 +576,7 @@ int test_not_private_ptr(private char* p) {
}
// CHECK-LABEL: test_not_local_ptr
-// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(3)* %p, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: %[[lnot:.*]] = icmp eq i8 addrspace(3)* %p, addrspacecast (i8* null to i8 addrspace(3)*)
// CHECK: %[[lnot_ext:.*]] = zext i1 %[[lnot]] to i32
// CHECK: ret i32 %[[lnot_ext]]
int test_not_local_ptr(local char* p) {
@@ -585,8 +585,8 @@ int test_not_local_ptr(local char* p) {
// CHECK-LABEL: test_and_ptr
-// CHECK: %[[tobool:.*]] = icmp ne i8* %p1, null
-// CHECK: %[[tobool1:.*]] = icmp ne i8 addrspace(3)* %p2, addrspacecast (i8 addrspace(4)* null to i8 addrspace(3)*)
+// CHECK: %[[tobool:.*]] = icmp ne i8 addrspace(5)* %p1, null
+// CHECK: %[[tobool1:.*]] = icmp ne i8 addrspace(3)* %p2, addrspacecast (i8* null to i8 addrspace(3)*)
// CHECK: %[[res:.*]] = and i1 %[[tobool]], %[[tobool1]]
// CHECK: %[[land_ext:.*]] = zext i1 %[[res]] to i32
// CHECK: ret i32 %[[land_ext]]
@@ -597,7 +597,7 @@ int test_and_ptr(private char* p1, local char* p2) {
// Test folding of null pointer in function scope.
// NOOPT-LABEL: test_fold_private
// NOOPT: call void @test_fold_callee
-// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)** %glob, align 8
+// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %glob, align 8
// NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
// NOOPT: call void @test_fold_callee
// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, 0
@@ -612,10 +612,10 @@ void test_fold_private(void) {
// NOOPT-LABEL: test_fold_local
// NOOPT: call void @test_fold_callee
-// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)** %glob, align 8
+// NOOPT: store i32 addrspace(1)* null, i32 addrspace(1)* addrspace(5)* %glob, align 8
// NOOPT: %{{.*}} = sub i64 %{{.*}}, 0
// NOOPT: call void @test_fold_callee
-// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, sext (i32 ptrtoint (i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) to i32) to i64)
+// NOOPT: %{{.*}} = add nsw i64 %{{.*}}, sext (i32 ptrtoint (i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) to i32) to i64)
// NOOPT: %{{.*}} = sub nsw i64 %{{.*}}, 1
void test_fold_local(void) {
global int* glob = (test_fold_callee(), (global int*)(generic char*)0);
diff --git a/test/CodeGenOpenCL/blocks.cl b/test/CodeGenOpenCL/blocks.cl
index 146d9dc113a8..0e125fd703f5 100644
--- a/test/CodeGenOpenCL/blocks.cl
+++ b/test/CodeGenOpenCL/blocks.cl
@@ -1,12 +1,11 @@
// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa-opencl | FileCheck -check-prefixes=COMMON,AMD %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s
-// COMMON: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
-// SPIR: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) }
-// AMD: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) }
+// COMMON: @__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }
// COMMON-NOT: .str
-// COMMON-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a)
+// SPIR-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a)
+// AMDGCN-LABEL: define internal {{.*}}void @block_A_block_invoke(i8* %.block_descriptor, i8 addrspace(3)* %a)
void (^block_A)(local void *) = ^(local void *a) {
return;
};
@@ -18,27 +17,32 @@ void foo(){
// COMMON-NOT: %block.flags
// COMMON-NOT: %block.reserved
// COMMON-NOT: %block.descriptor
- // COMMON: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 0
- // SPIR: store i32 16, i32* %[[block_size]]
- // AMD: store i32 20, i32* %[[block_size]]
- // COMMON: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 1
+ // SPIR: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block:.*]], i32 0, i32 0
+ // AMDGCN: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block:.*]], i32 0, i32 0
+ // SPIR: store i32 12, i32* %[[block_size]]
+ // AMDGCN: store i32 12, i32 addrspace(5)* %[[block_size]]
+ // SPIR: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 1
+ // AMDGCN: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 1
// SPIR: store i32 4, i32* %[[block_align]]
- // AMD: store i32 8, i32* %[[block_align]]
- // COMMON: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block:.*]], i32 0, i32 2
- // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %[[block_invoke]]
- // COMMON: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
- // COMMON: %[[i_value:.*]] = load i32, i32* %i
- // COMMON: store i32 %[[i_value]], i32* %[[block_captured]],
- // COMMON: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
- // COMMON: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
- // COMMON: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
- // COMMON: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
- // COMMON: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
- // COMMON: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
- // COMMON: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
- // COMMON: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
- // COMMON: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
- // COMMON: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+ // AMDGCN: store i32 4, i32 addrspace(5)* %[[block_align]]
+ // SPIR: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2
+ // SPIR: %[[i_value:.*]] = load i32, i32* %i
+ // SPIR: store i32 %[[i_value]], i32* %[[block_captured]],
+ // SPIR: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }>* %[[block]] to i32 ()*
+ // SPIR: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
+ // SPIR: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
+ // SPIR: %[[block_literal:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
+ // SPIR: %[[blk_gen_ptr:.*]] = bitcast i32 () addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
+ // SPIR: call {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %[[blk_gen_ptr]])
+ // AMDGCN: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(5)* %[[block]], i32 0, i32 2
+ // AMDGCN: %[[i_value:.*]] = load i32, i32 addrspace(5)* %i
+ // AMDGCN: store i32 %[[i_value]], i32 addrspace(5)* %[[block_captured]],
+ // AMDGCN: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i32 }> addrspace(5)* %[[block]] to i32 () addrspace(5)*
+ // AMDGCN: %[[blk_gen_ptr:.*]] = addrspacecast i32 () addrspace(5)* %[[blk_ptr]] to i32 ()*
+ // AMDGCN: store i32 ()* %[[blk_gen_ptr]], i32 ()* addrspace(5)* %[[block_B:.*]],
+ // AMDGCN: %[[block_literal:.*]] = load i32 ()*, i32 ()* addrspace(5)* %[[block_B]]
+ // AMDGCN: %[[blk_gen_ptr:.*]] = bitcast i32 ()* %[[block_literal]] to i8*
+ // AMDGCN: call {{.*}}i32 @__foo_block_invoke(i8* %[[blk_gen_ptr]])
int (^ block_B)(void) = ^{
return i;
@@ -46,9 +50,13 @@ void foo(){
block_B();
}
-// COMMON-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor)
-// COMMON: %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)*
-// COMMON: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* %[[block]], i32 0, i32 3
-// COMMON: %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]]
+// SPIR-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor)
+// SPIR: %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 }> addrspace(4)*
+// SPIR: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }> addrspace(4)* %[[block]], i32 0, i32 2
+// SPIR: %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]]
+// AMDGCN-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8* %.block_descriptor)
+// AMDGCN: %[[block:.*]] = bitcast i8* %.block_descriptor to <{ i32, i32, i32 }>*
+// AMDGCN: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i32 }>, <{ i32, i32, i32 }>* %[[block]], i32 0, i32 2
+// AMDGCN: %[[block_capture:.*]] = load i32, i32* %[[block_capture_addr]]
// COMMON-NOT: define{{.*}}@__foo_block_invoke_kernel
diff --git a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
new file mode 100644
index 000000000000..bfc811e29556
--- /dev/null
+++ b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl
@@ -0,0 +1,25 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -verify -S -emit-llvm -o - %s
+
+typedef unsigned int uint;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
+
+kernel void builtins_amdgcn_dl_insts_err(
+ global float *fOut, global int *siOut, global uint *uiOut,
+ half2 v2hA, half2 v2hB, float fC,
+ short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
+ ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
+ fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dl-insts}}
+
+ siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dl-insts}}
+ uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dl-insts}}
+
+ siOut[1] = __builtin_amdgcn_sdot4(siA, siB, siC); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dl-insts}}
+ uiOut[1] = __builtin_amdgcn_udot4(uiA, uiB, uiC); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dl-insts}}
+
+ siOut[2] = __builtin_amdgcn_sdot8(siA, siB, siC); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dl-insts}}
+ uiOut[2] = __builtin_amdgcn_udot8(uiA, uiB, uiC); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dl-insts}}
+}
diff --git a/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
new file mode 100644
index 000000000000..c407bd6fe155
--- /dev/null
+++ b/test/CodeGenOpenCL/builtins-amdgcn-dl-insts.cl
@@ -0,0 +1,36 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -S -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned int uint;
+typedef half __attribute__((ext_vector_type(2))) half2;
+typedef short __attribute__((ext_vector_type(2))) short2;
+typedef unsigned short __attribute__((ext_vector_type(2))) ushort2;
+
+// CHECK-LABEL: @builtins_amdgcn_dl_insts
+// CHECK: call float @llvm.amdgcn.fdot2
+
+// CHECK: call i32 @llvm.amdgcn.sdot2
+// CHECK: call i32 @llvm.amdgcn.udot2
+
+// CHECK: call i32 @llvm.amdgcn.sdot4
+// CHECK: call i32 @llvm.amdgcn.udot4
+
+// CHECK: call i32 @llvm.amdgcn.sdot8
+// CHECK: call i32 @llvm.amdgcn.udot8
+kernel void builtins_amdgcn_dl_insts(
+ global float *fOut, global int *siOut, global uint *uiOut,
+ half2 v2hA, half2 v2hB, float fC,
+ short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
+ ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
+ fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC);
+
+ siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC);
+ uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC);
+
+ siOut[1] = __builtin_amdgcn_sdot4(siA, siB, siC);
+ uiOut[1] = __builtin_amdgcn_udot4(uiA, uiB, uiC);
+
+ siOut[2] = __builtin_amdgcn_sdot8(siA, siB, siC);
+ uiOut[2] = __builtin_amdgcn_udot8(uiA, uiB, uiC);
+}
diff --git a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 1dad67491832..afa312cfcb12 100644
--- a/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -89,3 +89,20 @@ void test_mov_dpp(global int* out, int src)
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
}
+// CHECK-LABEL: @test_ds_fadd
+// CHECK: call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_faddf(local float *out, float src) {
+ *out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmin
+// CHECK: call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fminf(local float *out, float src) {
+ *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
+}
+
+// CHECK-LABEL: @test_ds_fmax
+// CHECK: call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %out, float %src, i32 0, i32 0, i1 false)
+void test_ds_fmaxf(local float *out, float src) {
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
+}
diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl
index 9f036547bf41..2015f36e93dc 100644
--- a/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -434,22 +434,22 @@ void test_read_exec_hi(global uint* out) {
}
// CHECK-LABEL: @test_dispatch_ptr
-// CHECK: call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
-void test_dispatch_ptr(__attribute__((address_space(2))) unsigned char ** out)
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+void test_dispatch_ptr(__attribute__((address_space(4))) unsigned char ** out)
{
*out = __builtin_amdgcn_dispatch_ptr();
}
// CHECK-LABEL: @test_kernarg_segment_ptr
-// CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr()
-void test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out)
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+void test_kernarg_segment_ptr(__attribute__((address_space(4))) unsigned char ** out)
{
*out = __builtin_amdgcn_kernarg_segment_ptr();
}
// CHECK-LABEL: @test_implicitarg_ptr
-// CHECK: call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
-void test_implicitarg_ptr(__attribute__((address_space(2))) unsigned char ** out)
+// CHECK: call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+void test_implicitarg_ptr(__attribute__((address_space(4))) unsigned char ** out)
{
*out = __builtin_amdgcn_implicitarg_ptr();
}
diff --git a/test/CodeGenOpenCL/byval.cl b/test/CodeGenOpenCL/byval.cl
index 90afdfab5256..05c72c514ca2 100644
--- a/test/CodeGenOpenCL/byval.cl
+++ b/test/CodeGenOpenCL/byval.cl
@@ -1,6 +1,4 @@
// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn---opencl %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn---amdgizcl %s | FileCheck %s -check-prefix=AMDGIZ
struct A {
int x[100];
@@ -10,10 +8,8 @@ int f(struct A a);
int g() {
struct A a;
- // CHECK: call i32 @f(%struct.A* byval{{.*}}%a)
- // AMDGIZ: call i32 @f(%struct.A addrspace(5)* byval{{.*}}%a)
+ // CHECK: call i32 @f(%struct.A addrspace(5)* byval{{.*}}%a)
return f(a);
}
-// CHECK: declare i32 @f(%struct.A* byval{{.*}})
-// AMDGIZ: declare i32 @f(%struct.A addrspace(5)* byval{{.*}})
+// CHECK: declare i32 @f(%struct.A addrspace(5)* byval{{.*}})
diff --git a/test/CodeGenOpenCL/cast_image.cl b/test/CodeGenOpenCL/cast_image.cl
index 479404a91371..d4e24b4f051b 100644
--- a/test/CodeGenOpenCL/cast_image.cl
+++ b/test/CodeGenOpenCL/cast_image.cl
@@ -4,7 +4,7 @@
#ifdef __AMDGCN__
constant int* convert(image2d_t img) {
- // AMDGCN: bitcast %opencl.image2d_ro_t addrspace(2)* %img to i32 addrspace(2)*
+ // AMDGCN: bitcast %opencl.image2d_ro_t addrspace(4)* %img to i32 addrspace(4)*
return __builtin_astype(img, constant int*);
}
diff --git a/test/CodeGenOpenCL/cl-uniform-wg-size.cl b/test/CodeGenOpenCL/cl-uniform-wg-size.cl
new file mode 100644
index 000000000000..76ace5dca21e
--- /dev/null
+++ b/test/CodeGenOpenCL/cl-uniform-wg-size.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
+// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 | FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
+
+kernel void ker() {};
+// CHECK: define{{.*}}@ker() #0
+
+void foo() {};
+// CHECK: define{{.*}}@foo() #1
+
+// CHECK-LABEL: attributes #0
+// CHECK-UNIFORM: "uniform-work-group-size"="true"
+// CHECK-NONUNIFORM: "uniform-work-group-size"="false"
+
+// CHECK-LABEL: attributes #1
+// CHECK-NOT: uniform-work-group-size
diff --git a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
index 0bf87c25bd83..d74a1dfbd484 100644
--- a/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
+++ b/test/CodeGenOpenCL/cl20-device-side-enqueue.cl
@@ -6,29 +6,31 @@
typedef void (^bl_t)(local void *);
typedef struct {int a;} ndrange_t;
-// COMMON: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
-
// For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
-// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INV_G:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+// COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
// For anonymous blocks without captures, emit block literals as global variable.
-// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVG8:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* [[INVG9:@[^ ]+]] to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
-// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* {{@[^ ]+}} to i8*) to i8 addrspace(4)*) }
+// COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
+// COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32 } { i32 {{[0-9]+}}, i32 {{[0-9]+}} }
// Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G
-// COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
+// COMMON: define internal spir_func void [[INV_G:.*]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
const bl_t block_G = (bl_t) ^ (local void *a) {};
+void callee(int id, __global int *out) {
+ out[id] = id;
+}
+
// COMMON-LABEL: define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: %default_queue = alloca %opencl.queue_t*
@@ -48,9 +50,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
- // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
// COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
@@ -66,8 +67,7 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
- // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
// COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
// COMMON-LABEL: call i32 @__enqueue_kernel_basic_events
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
@@ -88,10 +88,10 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 256, i64* %[[TMP1]], align 8
- // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs(
+ // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK1:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG1]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
// B32-SAME: i32* %[[TMP1]])
// B64-SAME: i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
@@ -109,10 +109,10 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
- // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs(
+ // COMMON-LABEL: call i32 @__enqueue_kernel_varargs(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK2:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG2]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
// B32-SAME: i32* %[[TMP1]])
// B64-SAME: i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
@@ -133,10 +133,10 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 256, i64* %[[TMP1]], align 8
- // COMMON-LABEL: call i32 @__enqueue_kernel_events_vaargs
+ // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG3]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
// B32-SAME: i32* %[[TMP1]])
// B64-SAME: i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
@@ -157,10 +157,10 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
- // COMMON-LABEL: call i32 @__enqueue_kernel_events_vaargs
+ // COMMON-LABEL: call i32 @__enqueue_kernel_events_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]],
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK4:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG4]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
// B32-SAME: i32* %[[TMP1]])
// B64-SAME: i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
@@ -179,10 +179,10 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
- // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs
+ // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK5:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG5]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
// B32-SAME: i32* %[[TMP1]])
// B64-SAME: i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
@@ -208,10 +208,10 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// B64: store i64 2, i64* %[[TMP2]], align 8
// B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 2
// B64: store i64 4, i64* %[[TMP3]], align 8
- // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs
+ // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK6:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG6]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3,
// B32-SAME: i32* %[[TMP1]])
// B64-SAME: i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
@@ -229,10 +229,10 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 4294967296, i64* %[[TMP1]], align 8
- // COMMON-LABEL: call i32 @__enqueue_kernel_vaargs
+ // COMMON-LABEL: call i32 @__enqueue_kernel_varargs
// COMMON-SAME: (%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK7:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG7]] to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1,
// B32-SAME: i32* %[[TMP1]])
// B64-SAME: i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
@@ -244,21 +244,19 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// Emits global block literal [[BLG8]] and invoke function [[INVG8]].
// The full type of these expressions are long (and repeated elsewhere), so we
// capture it as part of the regex for convenience and clarity.
- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+ // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
void (^const block_A)(void) = ^{
return;
};
// Emits global block literal [[BLG9]] and invoke function [[INVG9]].
- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+ // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
void (^const block_B)(local void *) = ^(local void *a) {
return;
};
// Uses global block literal [[BLG8]] and invoke function [[INVG8]].
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void [[INVG8:.*]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
// Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
@@ -267,49 +265,61 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON-LABEL: call i32 @__enqueue_kernel_basic(
// COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
enqueue_kernel(default_queue, flags, ndrange, block_A);
// Uses block kernel [[INVGK8]] and global block literal [[BLG8]].
// COMMON: call i32 @__get_kernel_work_group_size_impl(
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
unsigned size = get_kernel_work_group_size(block_A);
// Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
- // COMMON: [[r1:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* getelementptr inbounds (%struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), i32 0, i32 2)
- // COMMON: [[r2:%.*]] = addrspacecast i8 addrspace(4)* [[r1]] to void (i8 addrspace(4)*)*
- // COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call spir_func void [[INVG8]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
- // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
+ void (^block_C)(void) = ^{
+ callee(i, a);
+ };
+
+ // Emits block literal on stack and block kernel [[INVLK3]].
+ // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+ // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+ // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+ // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+ // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+ enqueue_kernel(default_queue, flags, ndrange, block_C);
+
+ // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INVG9]].
// COMMON: call i32 @__get_kernel_work_group_size_impl(
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG9]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_work_group_size(block_B);
// Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted.
- // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(
+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK8]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_preferred_work_group_size_multiple(block_A);
// Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
- // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(
+ // COMMON: call i32 @__get_kernel_preferred_work_group_size_multiple_impl(
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INV_G_K:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_preferred_work_group_size_multiple(block_G);
// Emits global block literal [[BLG10]] and block kernel [[INVGK10]].
// COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK10:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG10]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
// Emits global block literal [[BLG11]] and block kernel [[INVGK11]].
// COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{[^,]+}},
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK11:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
- // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* [[BLG11]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
}
@@ -331,8 +341,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: }
// COMMON: define internal spir_kernel void [[INVGK7]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
-// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
+// COMMON: define internal spir_func void [[INVG9:.*]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})
diff --git a/test/CodeGenOpenCL/convergent.cl b/test/CodeGenOpenCL/convergent.cl
index 285b637ca687..a011920761f3 100644
--- a/test/CodeGenOpenCL/convergent.cl
+++ b/test/CodeGenOpenCL/convergent.cl
@@ -127,7 +127,7 @@ void test_not_unroll() {
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
// CHECK-LABEL: @assume_convergent_asm
-// CHECK: tail call void asm sideeffect "s_barrier", ""() #4
+// CHECK: tail call void asm sideeffect "s_barrier", ""() #5
kernel void assume_convergent_asm()
{
__asm__ volatile("s_barrier");
@@ -138,4 +138,5 @@ kernel void assume_convergent_asm()
// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #6 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
diff --git a/test/CodeGenOpenCL/denorms-are-zero.cl b/test/CodeGenOpenCL/denorms-are-zero.cl
index ca7969ad5f40..ab8bf7641c87 100644
--- a/test/CodeGenOpenCL/denorms-are-zero.cl
+++ b/test/CodeGenOpenCL/denorms-are-zero.cl
@@ -1,19 +1,25 @@
-// RUN: %clang_cc1 -S -cl-denorms-are-zero -o - %s 2>&1
-// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=CHECK-DENORM
-// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck --check-prefix=CHECK-FEATURE %s
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck %s --check-prefix=DENORM-ZERO
+// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN-DENORM
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck --check-prefix=AMDGCN-FEATURE %s
-// For non-amdgcn targets, this test just checks that the -cl-denorms-are-zero argument is accepted
-// by clang. This option is currently a no-op, which is allowed by the
-// OpenCL specification.
+// For all targets 'denorms-are-zero' attribute is set to 'true'
+// if '-cl-denorms-are-zero' was specified and to 'false' otherwise.
+
+// CHECK-LABEL: define {{(dso_local )?}}void @f()
+// CHECK: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false"
+//
+// DENORM-ZERO-LABEL: define {{(dso_local )?}}void @f()
+// DENORM-ZERO: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true"
// For amdgcn target cpu fiji, fp32 should be flushed since fiji does not support fp32 denormals, unless +fp32-denormals is
// explicitly set. amdgcn target always do not flush fp64 denormals. The control for fp64 and fp16 denormals is the same.
-// CHECK-DENORM-LABEL: define void @f()
-// CHECK-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
-// CHECK-LABEL: define void @f()
-// CHECK: attributes #{{[0-9]*}} = {{{[^}]*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
-// CHECK-FEATURE-LABEL: define void @f()
-// CHECK-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-fp16-denormals{{[^"]*}}"
+// AMDGCN-LABEL: define void @f()
+// AMDGCN: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+// AMDGCN-DENORM-LABEL: define void @f()
+// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+// AMDGCN-FEATURE-LABEL: define void @f()
+// AMDGCN-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-fp16-denormals{{[^"]*}}"
void f() {}
diff --git a/test/CodeGenOpenCL/half.cl b/test/CodeGenOpenCL/half.cl
index a10ba4d7f9e9..eae8cdcb03ec 100644
--- a/test/CodeGenOpenCL/half.cl
+++ b/test/CodeGenOpenCL/half.cl
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple x86_64-pc-win32 | FileCheck %s
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
diff --git a/test/CodeGenOpenCL/inline-asm-amdgcn.cl b/test/CodeGenOpenCL/inline-asm-amdgcn.cl
new file mode 100644
index 000000000000..ccd98210d3a3
--- /dev/null
+++ b/test/CodeGenOpenCL/inline-asm-amdgcn.cl
@@ -0,0 +1,8 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s
+
+kernel void test_long(int arg0) {
+ long v15_16;
+ // CHECK: tail call i64 asm sideeffect "v_lshlrev_b64 v[15:16], 0, $0", "={v[15:16]},v"(i32 %arg0)
+ __asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(arg0));
+}
diff --git a/test/CodeGenOpenCL/kernel-attributes.cl b/test/CodeGenOpenCL/kernel-attributes.cl
index eff2e57d8c9b..c9ecb144c9f3 100644
--- a/test/CodeGenOpenCL/kernel-attributes.cl
+++ b/test/CodeGenOpenCL/kernel-attributes.cl
@@ -3,13 +3,13 @@
typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
kernel __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(1,2,4))) void kernel1(int a) {}
-// CHECK: define spir_kernel void @kernel1(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD1:[0-9]+]] !reqd_work_group_size ![[MD2:[0-9]+]]
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel1(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD1:[0-9]+]] !reqd_work_group_size ![[MD2:[0-9]+]]
kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
-// CHECK: define spir_kernel void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
-// CHECK: define spir_kernel void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
+// CHECK: define {{(dso_local )?}}spir_kernel void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
// CHECK: [[MD1]] = !{i32 undef, i32 1}
// CHECK: [[MD2]] = !{i32 1, i32 2, i32 4}
diff --git a/test/CodeGenOpenCL/kernel-metadata.cl b/test/CodeGenOpenCL/kernel-metadata.cl
index 95be43015aa9..cdec97b7df28 100644
--- a/test/CodeGenOpenCL/kernel-metadata.cl
+++ b/test/CodeGenOpenCL/kernel-metadata.cl
@@ -6,5 +6,5 @@ void normal_function() {
__kernel void kernel_function() {
}
-// CHECK: define spir_kernel void @kernel_function() {{[^{]+}} !kernel_arg_addr_space ![[MD:[0-9]+]] !kernel_arg_access_qual ![[MD]] !kernel_arg_type ![[MD]] !kernel_arg_base_type ![[MD]] !kernel_arg_type_qual ![[MD]] {
+// CHECK: define {{.*}}spir_kernel void @kernel_function() {{[^{]+}} !kernel_arg_addr_space ![[MD:[0-9]+]] !kernel_arg_access_qual ![[MD]] !kernel_arg_type ![[MD]] !kernel_arg_base_type ![[MD]] !kernel_arg_type_qual ![[MD]] {
// CHECK: ![[MD]] = !{}
diff --git a/test/CodeGenOpenCL/lifetime.cl b/test/CodeGenOpenCL/lifetime.cl
index 430e0582aeda..ed9f32fde933 100644
--- a/test/CodeGenOpenCL/lifetime.cl
+++ b/test/CodeGenOpenCL/lifetime.cl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn---amdgizcl %s | FileCheck %s -check-prefix=AMDGIZ
+// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn %s | FileCheck %s -check-prefix=AMDGCN
void use(char *a);
@@ -10,6 +10,6 @@ __attribute__((always_inline)) void helper_no_markers() {
void lifetime_test() {
// CHECK: @llvm.lifetime.start.p0i
-// AMDGIZ: @llvm.lifetime.start.p5i
+// AMDGCN: @llvm.lifetime.start.p5i
helper_no_markers();
}
diff --git a/test/CodeGenOpenCL/opencl_types.cl b/test/CodeGenOpenCL/opencl_types.cl
index 3501f9fd34e9..1f1294bd99aa 100644
--- a/test/CodeGenOpenCL/opencl_types.cl
+++ b/test/CodeGenOpenCL/opencl_types.cl
@@ -11,37 +11,39 @@ constant sampler_t glb_smp = CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRU
void fnc1(image1d_t img) {}
// CHECK-SPIR: @fnc1(%opencl.image1d_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc1(%opencl.image1d_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc1(%opencl.image1d_ro_t addrspace(4)*
void fnc1arr(image1d_array_t img) {}
// CHECK-SPIR: @fnc1arr(%opencl.image1d_array_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc1arr(%opencl.image1d_array_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc1arr(%opencl.image1d_array_ro_t addrspace(4)*
void fnc1buff(image1d_buffer_t img) {}
// CHECK-SPIR: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc1buff(%opencl.image1d_buffer_ro_t addrspace(4)*
void fnc2(image2d_t img) {}
// CHECK-SPIR: @fnc2(%opencl.image2d_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc2(%opencl.image2d_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc2(%opencl.image2d_ro_t addrspace(4)*
void fnc2arr(image2d_array_t img) {}
// CHECK-SPIR: @fnc2arr(%opencl.image2d_array_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc2arr(%opencl.image2d_array_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc2arr(%opencl.image2d_array_ro_t addrspace(4)*
void fnc3(image3d_t img) {}
// CHECK-SPIR: @fnc3(%opencl.image3d_ro_t addrspace(1)*
-// CHECK-AMDGCN: @fnc3(%opencl.image3d_ro_t addrspace(2)*
+// CHECK-AMDGCN: @fnc3(%opencl.image3d_ro_t addrspace(4)*
void fnc4smp(sampler_t s) {}
// CHECK-SPIR-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
-// CHECK-AMDGCN-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+// CHECK-AMDGCN-LABEL: define {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
kernel void foo(image1d_t img) {
sampler_t smp = CLK_ADDRESS_CLAMP_TO_EDGE|CLK_NORMALIZED_COORDS_TRUE|CLK_FILTER_LINEAR;
- // CHECK-COM: alloca %opencl.sampler_t addrspace(2)*
+ // CHECK-SPIR: alloca %opencl.sampler_t addrspace(2)*
+ // CHECK-AMDGCN: alloca %opencl.sampler_t addrspace(4)*
event_t evt;
- // CHECK-COM: alloca %opencl.event_t*
+ // CHECK-SPIR: alloca %opencl.event_t*
+ // CHECK-AMDGCN: alloca %opencl.event_t addrspace(5)*
clk_event_t clk_evt;
// CHECK-SPIR: alloca %opencl.clk_event_t*
// CHECK-AMDGCN: alloca %opencl.clk_event_t addrspace(1)*
@@ -51,17 +53,24 @@ kernel void foo(image1d_t img) {
reserve_id_t rid;
// CHECK-SPIR: alloca %opencl.reserve_id_t*
// CHECK-AMDGCN: alloca %opencl.reserve_id_t addrspace(1)*
- // CHECK-COM: store %opencl.sampler_t addrspace(2)*
+ // CHECK-SPIR: store %opencl.sampler_t addrspace(2)*
+ // CHECK-AMDGCN: store %opencl.sampler_t addrspace(4)*
fnc4smp(smp);
- // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+ // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+ // CHECK-AMDGCN: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
fnc4smp(glb_smp);
- // CHECK-COM: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+ // CHECK-SPIR: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(2)*
+ // CHECK-AMDGCN: call {{.*}}void @fnc4smp(%opencl.sampler_t addrspace(4)*
}
-kernel void foo_pipe(read_only pipe int p) {}
-// CHECK-SPIR: @foo_pipe(%opencl.pipe_t addrspace(1)* %p)
-// CHECK_AMDGCN: @foo_pipe(%opencl.pipe_t addrspace(1)* %p)
+kernel void foo_ro_pipe(read_only pipe int p) {}
+// CHECK-SPIR: @foo_ro_pipe(%opencl.pipe_ro_t addrspace(1)* %p)
+// CHECK_AMDGCN: @foo_ro_pipe(%opencl.pipe_ro_t addrspace(1)* %p)
+
+kernel void foo_wo_pipe(write_only pipe int p) {}
+// CHECK-SPIR: @foo_wo_pipe(%opencl.pipe_wo_t addrspace(1)* %p)
+// CHECK_AMDGCN: @foo_wo_pipe(%opencl.pipe_wo_t addrspace(1)* %p)
void __attribute__((overloadable)) bad1(image1d_t b, image2d_t c, image2d_t d) {}
// CHECK-SPIR-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}
-// CHECK-AMDGCN-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}(%opencl.image1d_ro_t addrspace(2)*{{.*}}%opencl.image2d_ro_t addrspace(2)*{{.*}}%opencl.image2d_ro_t addrspace(2)*{{.*}})
+// CHECK-AMDGCN-LABEL: @{{_Z4bad114ocl_image1d_ro14ocl_image2d_roS0_|"\\01\?bad1@@\$\$J0YAXPAUocl_image1d_ro@@PAUocl_image2d_ro@@1@Z"}}(%opencl.image1d_ro_t addrspace(4)*{{.*}}%opencl.image2d_ro_t addrspace(4)*{{.*}}%opencl.image2d_ro_t addrspace(4)*{{.*}})
diff --git a/test/CodeGenOpenCL/partial_initializer.cl b/test/CodeGenOpenCL/partial_initializer.cl
index 454c82f89299..ee6be919a7fa 100644
--- a/test/CodeGenOpenCL/partial_initializer.cl
+++ b/test/CodeGenOpenCL/partial_initializer.cl
@@ -36,17 +36,17 @@ void f(void) {
// CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16
// CHECK: %[[v0:.*]] = bitcast [6 x [6 x float]]* %A to i8*
- // CHECK: call void @llvm.memset.p0i8.i32(i8* %[[v0]], i8 0, i32 144, i32 4, i1 false)
+ // CHECK: call void @llvm.memset.p0i8.i32(i8* align 4 %[[v0]], i8 0, i32 144, i1 false)
// CHECK: %[[v1:.*]] = bitcast i8* %[[v0]] to [6 x [6 x float]]*
- // CHECK: %[[v2:.*]] = getelementptr [6 x [6 x float]], [6 x [6 x float]]* %[[v1]], i32 0, i32 0
- // CHECK: %[[v3:.*]] = getelementptr [6 x float], [6 x float]* %[[v2]], i32 0, i32 0
- // CHECK: store float 1.000000e+00, float* %[[v3]]
- // CHECK: %[[v4:.*]] = getelementptr [6 x float], [6 x float]* %[[v2]], i32 0, i32 1
- // CHECK: store float 2.000000e+00, float* %[[v4]]
+ // CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], [6 x [6 x float]]* %[[v1]], i32 0, i32 0
+ // CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], [6 x float]* %[[v2]], i32 0, i32 0
+ // CHECK: store float 1.000000e+00, float* %[[v3]], align 4
+ // CHECK: %[[v4:.*]] = getelementptr inbounds [6 x float], [6 x float]* %[[v2]], i32 0, i32 1
+ // CHECK: store float 2.000000e+00, float* %[[v4]], align 4
float A[6][6] = {1.0f, 2.0f};
// CHECK: %[[v5:.*]] = bitcast %struct.StrucTy* %S to i8*
- // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* %[[v5]], i8 addrspace(2)* bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i32 4, i1 false)
+ // CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[v5]], i8 addrspace(2)* align 4 bitcast (%struct.StrucTy addrspace(2)* @f.S to i8 addrspace(2)*), i32 12, i1 false)
StrucTy S = {1, 2};
// CHECK: store <2 x i32> <i32 1, i32 2>, <2 x i32>* %[[compoundliteral1]], align 8
diff --git a/test/CodeGenOpenCL/pipe_builtin.cl b/test/CodeGenOpenCL/pipe_builtin.cl
index 4b4b4ef97ab2..d912fce5e95d 100644
--- a/test/CodeGenOpenCL/pipe_builtin.cl
+++ b/test/CodeGenOpenCL/pipe_builtin.cl
@@ -1,68 +1,76 @@
// RUN: %clang_cc1 -emit-llvm -cl-ext=+cl_khr_subgroups -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
-// CHECK: %opencl.pipe_t = type opaque
-// CHECK: %opencl.reserve_id_t = type opaque
+// CHECK-DAG: %opencl.pipe_ro_t = type opaque
+// CHECK-DAG: %opencl.pipe_wo_t = type opaque
+// CHECK-DAG: %opencl.reserve_id_t = type opaque
#pragma OPENCL EXTENSION cl_khr_subgroups : enable
void test1(read_only pipe int p, global int *ptr) {
- // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
read_pipe(p, ptr);
- // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = reserve_read_pipe(p, 2);
- // CHECK: call i32 @__read_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // CHECK: call i32 @__read_pipe_4(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
read_pipe(p, rid, 2, ptr);
- // CHECK: call void @__commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
commit_read_pipe(p, rid);
}
void test2(write_only pipe int p, global int *ptr) {
- // CHECK: call i32 @__write_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // CHECK: call i32 @__write_pipe_2(%opencl.pipe_wo_t* %{{.*}}, i8* %{{.*}}, i32 4, i32 4)
write_pipe(p, ptr);
- // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = reserve_write_pipe(p, 2);
- // CHECK: call i32 @__write_pipe_4(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
+ // CHECK: call i32 @__write_pipe_4(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 {{.*}}, i8* %{{.*}}, i32 4, i32 4)
write_pipe(p, rid, 2, ptr);
- // CHECK: call void @__commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
commit_write_pipe(p, rid);
}
void test3(read_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = work_group_reserve_read_pipe(p, 2);
- // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__work_group_commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
work_group_commit_read_pipe(p, rid);
}
void test4(write_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__work_group_reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = work_group_reserve_write_pipe(p, 2);
- // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__work_group_commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
work_group_commit_write_pipe(p, rid);
}
void test5(read_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_read_pipe(%opencl.pipe_ro_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = sub_group_reserve_read_pipe(p, 2);
- // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__sub_group_commit_read_pipe(%opencl.pipe_ro_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
sub_group_commit_read_pipe(p, rid);
}
void test6(write_only pipe int p, global int *ptr) {
- // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
+ // CHECK: call %opencl.reserve_id_t* @__sub_group_reserve_write_pipe(%opencl.pipe_wo_t* %{{.*}}, i32 {{.*}}, i32 4, i32 4)
reserve_id_t rid = sub_group_reserve_write_pipe(p, 2);
- // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call void @__sub_group_commit_write_pipe(%opencl.pipe_wo_t* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 4, i32 4)
sub_group_commit_write_pipe(p, rid);
}
-void test7(write_only pipe int p, global int *ptr) {
- // CHECK: call i32 @__get_pipe_num_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4)
+void test7(read_only pipe int p, global int *ptr) {
+ // CHECK: call i32 @__get_pipe_num_packets_ro(%opencl.pipe_ro_t* %{{.*}}, i32 4, i32 4)
*ptr = get_pipe_num_packets(p);
- // CHECK: call i32 @__get_pipe_max_packets(%opencl.pipe_t* %{{.*}}, i32 4, i32 4)
+ // CHECK: call i32 @__get_pipe_max_packets_ro(%opencl.pipe_ro_t* %{{.*}}, i32 4, i32 4)
*ptr = get_pipe_max_packets(p);
}
-void test8(read_only pipe int r, write_only pipe int w, global int *ptr) {
+void test8(write_only pipe int p, global int *ptr) {
+ // CHECK: call i32 @__get_pipe_num_packets_wo(%opencl.pipe_wo_t* %{{.*}}, i32 4, i32 4)
+ *ptr = get_pipe_num_packets(p);
+ // CHECK: call i32 @__get_pipe_max_packets_wo(%opencl.pipe_wo_t* %{{.*}}, i32 4, i32 4)
+ *ptr = get_pipe_max_packets(p);
+}
+
+void test9(read_only pipe int r, write_only pipe int w, global int *ptr) {
// verify that return type is correctly casted to i1 value
// CHECK: %[[R:[0-9]+]] = call i32 @__read_pipe_2
// CHECK: icmp ne i32 %[[R]], 0
@@ -70,10 +78,16 @@ void test8(read_only pipe int r, write_only pipe int w, global int *ptr) {
// CHECK: %[[W:[0-9]+]] = call i32 @__write_pipe_2
// CHECK: icmp ne i32 %[[W]], 0
if (write_pipe(w, ptr)) *ptr = -1;
- // CHECK: %[[N:[0-9]+]] = call i32 @__get_pipe_num_packets
- // CHECK: icmp ne i32 %[[N]], 0
+ // CHECK: %[[NR:[0-9]+]] = call i32 @__get_pipe_num_packets_ro
+ // CHECK: icmp ne i32 %[[NR]], 0
if (get_pipe_num_packets(r)) *ptr = -1;
- // CHECK: %[[M:[0-9]+]] = call i32 @__get_pipe_max_packets
- // CHECK: icmp ne i32 %[[M]], 0
+ // CHECK: %[[NW:[0-9]+]] = call i32 @__get_pipe_num_packets_wo
+ // CHECK: icmp ne i32 %[[NW]], 0
+ if (get_pipe_num_packets(w)) *ptr = -1;
+ // CHECK: %[[MR:[0-9]+]] = call i32 @__get_pipe_max_packets_ro
+ // CHECK: icmp ne i32 %[[MR]], 0
+ if (get_pipe_max_packets(r)) *ptr = -1;
+ // CHECK: %[[MW:[0-9]+]] = call i32 @__get_pipe_max_packets_wo
+ // CHECK: icmp ne i32 %[[MW]], 0
if (get_pipe_max_packets(w)) *ptr = -1;
}
diff --git a/test/CodeGenOpenCL/pipe_types.cl b/test/CodeGenOpenCL/pipe_types.cl
index 7c11f74ad7b9..ba064c6d7557 100644
--- a/test/CodeGenOpenCL/pipe_types.cl
+++ b/test/CodeGenOpenCL/pipe_types.cl
@@ -1,34 +1,35 @@
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -o - %s | FileCheck %s
-// CHECK: %opencl.pipe_t = type opaque
+// CHECK: %opencl.pipe_ro_t = type opaque
+// CHECK: %opencl.pipe_wo_t = type opaque
typedef unsigned char __attribute__((ext_vector_type(3))) uchar3;
typedef int __attribute__((ext_vector_type(4))) int4;
void test1(read_only pipe int p) {
-// CHECK: define void @test1(%opencl.pipe_t* %p)
+// CHECK: define void @test1(%opencl.pipe_ro_t* %p)
reserve_id_t rid;
// CHECK: %rid = alloca %opencl.reserve_id_t
}
void test2(write_only pipe float p) {
-// CHECK: define void @test2(%opencl.pipe_t* %p)
+// CHECK: define void @test2(%opencl.pipe_wo_t* %p)
}
void test3(read_only pipe const int p) {
-// CHECK: define void @test3(%opencl.pipe_t* %p)
+// CHECK: define void @test3(%opencl.pipe_ro_t* %p)
}
void test4(read_only pipe uchar3 p) {
-// CHECK: define void @test4(%opencl.pipe_t* %p)
+// CHECK: define void @test4(%opencl.pipe_ro_t* %p)
}
void test5(read_only pipe int4 p) {
-// CHECK: define void @test5(%opencl.pipe_t* %p)
+// CHECK: define void @test5(%opencl.pipe_ro_t* %p)
}
typedef read_only pipe int MyPipe;
kernel void test6(MyPipe p) {
-// CHECK: define spir_kernel void @test6(%opencl.pipe_t* %p)
+// CHECK: define spir_kernel void @test6(%opencl.pipe_ro_t* %p)
}
struct Person {
@@ -41,7 +42,7 @@ void test_reserved_read_pipe(global struct Person *SDst,
read_only pipe struct Person SPipe) {
// CHECK: define void @test_reserved_read_pipe
read_pipe (SPipe, SDst);
- // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
+ // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
read_pipe (SPipe, SDst);
- // CHECK: call i32 @__read_pipe_2(%opencl.pipe_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
+ // CHECK: call i32 @__read_pipe_2(%opencl.pipe_ro_t* %{{.*}}, i8* %{{.*}}, i32 16, i32 8)
}
diff --git a/test/CodeGenOpenCL/private-array-initialization.cl b/test/CodeGenOpenCL/private-array-initialization.cl
index a7d4a9891977..9aa058dcfacf 100644
--- a/test/CodeGenOpenCL/private-array-initialization.cl
+++ b/test/CodeGenOpenCL/private-array-initialization.cl
@@ -1,9 +1,33 @@
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE0 %s
+// RUN: %clang_cc1 %s -triple amdgcn-amd-amdhsa-unknown -O0 -emit-llvm -o - | FileCheck -check-prefix=PRIVATE5 %s
// CHECK: @test.arr = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4
void test() {
__private int arr[] = {1, 2, 3};
-// CHECK: %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p2i8.i32(i8* %[[arr_i8_ptr]], i8 addrspace(2)* bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i32 4, i1 false)
+// PRIVATE0: %[[arr_i8_ptr:[0-9]+]] = bitcast [3 x i32]* %arr to i8*
+// PRIVATE0: call void @llvm.memcpy.p0i8.p2i8.i32(i8* align 4 %[[arr_i8_ptr]], i8 addrspace(2)* align 4 bitcast ([3 x i32] addrspace(2)* @test.arr to i8 addrspace(2)*), i32 12, i1 false)
+
+// PRIVATE5: %arr = alloca [3 x i32], align 4, addrspace(5)
+// PRIVATE5: %0 = bitcast [3 x i32] addrspace(5)* %arr to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 %0, i8 addrspace(4)* align 4 bitcast ([3 x i32] addrspace(4)* @test.arr to i8 addrspace(4)*), i64 12, i1 false)
+}
+
+__kernel void initializer_cast_is_valid_crash() {
+// PRIVATE0: %v512 = alloca [64 x i8], align 1
+// PRIVATE0: %0 = bitcast [64 x i8]* %v512 to i8*
+// PRIVATE0: call void @llvm.memset.p0i8.i32(i8* align 1 %0, i8 0, i32 64, i1 false)
+// PRIVATE0: %1 = bitcast i8* %0 to [64 x i8]*
+
+
+// PRIVATE5: %v512 = alloca [64 x i8], align 1, addrspace(5)
+// PRIVATE5: %0 = bitcast [64 x i8] addrspace(5)* %v512 to i8 addrspace(5)*
+// PRIVATE5: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* align 1 %0, i8 0, i64 64, i1 false)
+// PRIVATE5: %1 = bitcast i8 addrspace(5)* %0 to [64 x i8] addrspace(5)*
+ unsigned char v512[64] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00
+ };
}
diff --git a/test/CodeGenOpenCL/shifts.cl b/test/CodeGenOpenCL/shifts.cl
index 14cd7aff65be..7011a4234bb5 100644
--- a/test/CodeGenOpenCL/shifts.cl
+++ b/test/CodeGenOpenCL/shifts.cl
@@ -58,16 +58,17 @@ int4 vectorVectorTest(int4 a,int4 b) {
return f;
}
-//OPT: @vectorScalarTest
+//NOOPT-LABEL: @vectorScalarTest
int4 vectorScalarTest(int4 a,int b) {
- //OPT: [[SP0:%.+]] = insertelement <4 x i32> undef, i32 %b, i32 0
- //OPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> undef, <4 x i32> zeroinitializer
- //OPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], <i32 31, i32 31, i32 31, i32 31>
- //OPT-NEXT: [[VSC:%.+]] = shl <4 x i32> %a, [[VSM]]
+ //NOOPT: [[SP0:%.+]] = insertelement <4 x i32> undef
+ //NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> undef, <4 x i32> zeroinitializer
+ //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], <i32 31, i32 31, i32 31, i32 31>
+ //NOOPT: [[VSC:%.+]] = shl <4 x i32> [[VSS:%.+]], [[VSM]]
int4 c = a << b;
- //OPT-NEXT: [[VSF:%.+]] = add <4 x i32> [[VSC]], <i32 4, i32 4, i32 4, i32 4>
+ //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], <i32 2, i32 2, i32 2, i32 2>
+ //NOOPT: [[VSA:%.+]] = add <4 x i32> [[VSC2:%.+]], [[VSF]]
int4 d = {1, 1, 1, 1};
int4 f = c + (d << 34);
- //OPT-NEXT: ret <4 x i32> [[VSF]]
+ //NOOPT: ret <4 x i32>
return f;
}
diff --git a/test/CodeGenOpenCL/size_t.cl b/test/CodeGenOpenCL/size_t.cl
index 02950bb66270..63a062268da3 100644
--- a/test/CodeGenOpenCL/size_t.cl
+++ b/test/CodeGenOpenCL/size_t.cl
@@ -1,12 +1,14 @@
// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple spir-unknown-unknown -o - | FileCheck --check-prefix=SZ32 %s
// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple spir64-unknown-unknown -o - | FileCheck --check-prefix=SZ64 --check-prefix=SZ64ONLY %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDONLY %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn---opencl -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDONLY %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDGCN %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -finclude-default-header -emit-llvm -O0 -triple amdgcn---opencl -o - | FileCheck --check-prefix=SZ64 --check-prefix=AMDGCN %s
//SZ32: define{{.*}} i32 @test_ptrtoint_private(i8* %x)
//SZ32: ptrtoint i8* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_ptrtoint_private(i8* %x)
-//SZ64: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_private(i8* %x)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_private(i8 addrspace(5)* %x)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
size_t test_ptrtoint_private(private char* x) {
return (size_t)x;
}
@@ -21,8 +23,10 @@ intptr_t test_ptrtoint_global(global char* x) {
//SZ32: define{{.*}} i32 @test_ptrtoint_constant(i8 addrspace(2)* %x)
//SZ32: ptrtoint i8 addrspace(2)* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(2)* %x)
-//SZ64: ptrtoint i8 addrspace(2)* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(2)* %x)
+//SZ64ONLY: ptrtoint i8 addrspace(2)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_constant(i8 addrspace(4)* %x)
+//AMDGCN: ptrtoint i8 addrspace(4)* %{{.*}} to i64
uintptr_t test_ptrtoint_constant(constant char* x) {
return (uintptr_t)x;
}
@@ -37,18 +41,21 @@ size_t test_ptrtoint_local(local char* x) {
//SZ32: define{{.*}} i32 @test_ptrtoint_generic(i8 addrspace(4)* %x)
//SZ32: ptrtoint i8 addrspace(4)* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_ptrtoint_generic(i8 addrspace(4)* %x)
-//SZ64: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_ptrtoint_generic(i8 addrspace(4)* %x)
+//SZ64ONLY: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_ptrtoint_generic(i8* %x)
+//AMDGCN: ptrtoint i8* %{{.*}} to i64
size_t test_ptrtoint_generic(generic char* x) {
return (size_t)x;
}
//SZ32: define{{.*}} i8* @test_inttoptr_private(i32 %x)
//SZ32: inttoptr i32 %{{.*}} to i8*
-//SZ64: define{{.*}} i8* @test_inttoptr_private(i64 %x)
-//AMDONLY: trunc i64 %{{.*}} to i32
-//AMDONLY: inttoptr i32 %{{.*}} to i8*
+//SZ64ONLY: define{{.*}} i8* @test_inttoptr_private(i64 %x)
//SZ64ONLY: inttoptr i64 %{{.*}} to i8*
+//AMDGCN: define{{.*}} i8 addrspace(5)* @test_inttoptr_private(i64 %x)
+//AMDGCN: trunc i64 %{{.*}} to i32
+//AMDGCN: inttoptr i32 %{{.*}} to i8 addrspace(5)*
private char* test_inttoptr_private(size_t x) {
return (private char*)x;
}
@@ -64,8 +71,8 @@ global char* test_inttoptr_global(size_t x) {
//SZ32: define{{.*}} i8 addrspace(3)* @test_add_local(i8 addrspace(3)* %x, i32 %y)
//SZ32: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
//SZ64: define{{.*}} i8 addrspace(3)* @test_add_local(i8 addrspace(3)* %x, i64 %y)
-//AMDONLY: trunc i64 %{{.*}} to i32
-//AMDONLY: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
+//AMDGCN: trunc i64 %{{.*}} to i32
+//AMDGCN: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i32
//SZ64ONLY: getelementptr inbounds i8, i8 addrspace(3)* %{{.*}}, i64
local char* test_add_local(local char* x, ptrdiff_t y) {
return x + y;
@@ -92,9 +99,12 @@ ptrdiff_t test_sub_local(local char* x, local char *y) {
//SZ32: define{{.*}} i32 @test_sub_private(i8* %x, i8* %y)
//SZ32: ptrtoint i8* %{{.*}} to i32
//SZ32: ptrtoint i8* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_sub_private(i8* %x, i8* %y)
-//SZ64: ptrtoint i8* %{{.*}} to i64
-//SZ64: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_sub_private(i8* %x, i8* %y)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_sub_private(i8 addrspace(5)* %x, i8 addrspace(5)* %y)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
ptrdiff_t test_sub_private(private char* x, private char *y) {
return x - y;
}
@@ -102,9 +112,12 @@ ptrdiff_t test_sub_private(private char* x, private char *y) {
//SZ32: define{{.*}} i32 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
//SZ32: ptrtoint i8* %{{.*}} to i32
//SZ32: ptrtoint i8 addrspace(4)* %{{.*}} to i32
-//SZ64: define{{.*}} i64 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
-//SZ64: ptrtoint i8* %{{.*}} to i64
-//SZ64: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//SZ64ONLY: define{{.*}} i64 @test_sub_mix(i8* %x, i8 addrspace(4)* %y)
+//SZ64ONLY: ptrtoint i8* %{{.*}} to i64
+//SZ64ONLY: ptrtoint i8 addrspace(4)* %{{.*}} to i64
+//AMDGCN: define{{.*}} i64 @test_sub_mix(i8 addrspace(5)* %x, i8* %y)
+//AMDGCN: ptrtoint i8 addrspace(5)* %{{.*}} to i64
+//AMDGCN: ptrtoint i8* %{{.*}} to i64
ptrdiff_t test_sub_mix(private char* x, generic char *y) {
return x - y;
}
diff --git a/test/CodeGenOpenCL/str_literals.cl b/test/CodeGenOpenCL/str_literals.cl
index 8fe5cefc8d75..514044cdcc7d 100644
--- a/test/CodeGenOpenCL/str_literals.cl
+++ b/test/CodeGenOpenCL/str_literals.cl
@@ -1,9 +1,15 @@
// RUN: %clang_cc1 %s -cl-opt-disable -emit-llvm -o - -ffake-address-space-map | FileCheck %s
-__constant char * __constant x = "hello world";
-__constant char * __constant y = "hello world";
+__constant char *__constant x = "hello world";
+__constant char *__constant y = "hello world";
-// CHECK: unnamed_addr addrspace(2) constant
+// CHECK: unnamed_addr addrspace(2) constant{{.*}}"hello world\00"
// CHECK-NOT: addrspace(2) unnamed_addr constant
-// CHECK: @x = addrspace(2) constant i8 addrspace(2)*
-// CHECK: @y = addrspace(2) constant i8 addrspace(2)*
+// CHECK: @x = {{(dso_local )?}}addrspace(2) constant i8 addrspace(2)*
+// CHECK: @y = {{(dso_local )?}}addrspace(2) constant i8 addrspace(2)*
+// CHECK: unnamed_addr addrspace(2) constant{{.*}}"f\00"
+
+void f() {
+ //CHECK: store i8 addrspace(2)* {{.*}}, i8 addrspace(2)**
+ constant const char *f3 = __func__;
+}
diff --git a/test/CodeGenOpenCL/vla.cl b/test/CodeGenOpenCL/vla.cl
index 5d3599fc3cce..f3d868a7b21b 100644
--- a/test/CodeGenOpenCL/vla.cl
+++ b/test/CodeGenOpenCL/vla.cl
@@ -1,13 +1,14 @@
// RUN: %clang_cc1 -emit-llvm -triple "spir-unknown-unknown" -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 -emit-llvm -triple amdgcn-amd-amdhsa-opencl -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,SPIR %s
-// RUN: %clang_cc1 -emit-llvm -triple amdgcn-amd-amdhsa-amdgizcl -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,GIZ %s
+// RUN: %clang_cc1 -emit-llvm -triple amdgcn-amd-amdhsa -O0 -cl-std=CL2.0 -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
constant int sz0 = 5;
-// CHECK: @sz0 = addrspace(2) constant i32 5
+// SPIR: @sz0 = addrspace(2) constant i32 5
+// AMDGCN: @sz0 = addrspace(4) constant i32 5
const global int sz1 = 16;
// CHECK: @sz1 = addrspace(1) constant i32 16
const constant int sz2 = 8;
-// CHECK: @sz2 = addrspace(2) constant i32 8
+// SPIR: @sz2 = addrspace(2) constant i32 8
+// AMDGCN: @sz2 = addrspace(4) constant i32 8
// CHECK: @testvla.vla2 = internal addrspace(3) global [8 x i16] undef
kernel void testvla()
@@ -15,10 +16,10 @@ kernel void testvla()
int vla0[sz0];
// SPIR: %vla0 = alloca [5 x i32]
// SPIR-NOT: %vla0 = alloca [5 x i32]{{.*}}addrspace
-// GIZ: %vla0 = alloca [5 x i32]{{.*}}addrspace(5)
+// AMDGCN: %vla0 = alloca [5 x i32]{{.*}}addrspace(5)
char vla1[sz1];
// SPIR: %vla1 = alloca [16 x i8]
// SPIR-NOT: %vla1 = alloca [16 x i8]{{.*}}addrspace
-// GIZ: %vla1 = alloca [16 x i8]{{.*}}addrspace(5)
+// AMDGCN: %vla1 = alloca [16 x i8]{{.*}}addrspace(5)
local short vla2[sz2];
}