diff options
Diffstat (limited to 'test/CodeGenOpenCL/atomic-ops.cl')
| -rw-r--r-- | test/CodeGenOpenCL/atomic-ops.cl | 291 |
1 files changed, 291 insertions, 0 deletions
diff --git a/test/CodeGenOpenCL/atomic-ops.cl b/test/CodeGenOpenCL/atomic-ops.cl new file mode 100644 index 0000000000000..160f7fbd528d9 --- /dev/null +++ b/test/CodeGenOpenCL/atomic-ops.cl @@ -0,0 +1,291 @@ +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa-amdgizcl | opt -instnamer -S | FileCheck %s + +// Also test serialization of atomic operations here, to avoid duplicating the test. +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa-amdgizcl +// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa-amdgizcl -emit-llvm -o - | opt -instnamer -S | FileCheck %s + +#ifndef ALREADY_INCLUDED +#define ALREADY_INCLUDED + +typedef __INTPTR_TYPE__ intptr_t; +typedef int int8 __attribute__((ext_vector_type(8))); + +typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +#endif +} memory_scope; + +atomic_int j; + +void fi1(atomic_int *i) { + // CHECK-LABEL: @fi1 + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); + + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst + x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); + + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst + x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); + + // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("subgroup") seq_cst + x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); +} + +void fi2(atomic_int *i) { + // CHECK-LABEL: @fi2 + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); +} + +void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) { + // CHECK-LABEL: @test_addr + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); + + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); + + // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group); +} + +void fi3(atomic_int *i, atomic_uint *ui) { + // CHECK-LABEL: @fi3 + // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); + + // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); + + // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); + + // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); + + // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst + x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); +} + +bool fi4(atomic_int *i) { + // CHECK-LABEL: @fi4( + // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire + // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 + // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 + // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] + // CHECK: store i32 [[OLD]] + int cmp = 0; + return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); +} + +void fi5(atomic_int *i, int scope) { + // CHECK-LABEL: @fi5 + // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [ + // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]] + // CHECK-NEXT: i32 2, label %[[opencl_device:.*]] + // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] + // CHECK-NEXT: ] + // CHECK: [[opencl_workgroup]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: br label %[[continue:.*]] + // CHECK: [[opencl_device]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst + // CHECK: br label %[[continue]] + // CHECK: [[opencl_allsvmdevices]]: + // CHECK: load atomic i32, i32* %{{.*}} seq_cst + // CHECK: br label %[[continue]] + // CHECK: [[opencl_subgroup]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst + // CHECK: br label %[[continue]] + // CHECK: [[continue]]: + int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); +} + +void fi6(atomic_int *i, int order, int scope) { + // CHECK-LABEL: @fi6 + // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [ + // CHECK-NEXT: i32 1, label %[[acquire:.*]] + // CHECK-NEXT: i32 2, label %[[acquire:.*]] + // CHECK-NEXT: i32 5, label %[[seqcst:.*]] + // CHECK-NEXT: ] + // CHECK: [[monotonic]]: + // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] + // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[acquire]]: + // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[seqcst]]: + // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [ + // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] + // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] + // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] + // CHECK-NEXT: ] + // CHECK: [[MON_WG]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic + // CHECK: [[MON_DEV]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic + // CHECK: [[MON_ALL]]: + // CHECK: load atomic i32, i32* %{{.*}} monotonic + // CHECK: [[MON_SUB]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") monotonic + // CHECK: [[ACQ_WG]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire + // CHECK: [[ACQ_DEV]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire + // CHECK: [[ACQ_ALL]]: + // CHECK: load atomic i32, i32* %{{.*}} acquire + // CHECK: [[ACQ_SUB]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") acquire + // CHECK: [[SEQ_WG]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst + // CHECK: [[SEQ_DEV]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst + // CHECK: [[SEQ_ALL]]: + // CHECK: load atomic i32, i32* %{{.*}} seq_cst + // CHECK: [[SEQ_SUB]]: + // CHECK: load atomic i32, i32* %{{.*}} syncscope("subgroup") seq_cst + int x = __opencl_atomic_load(i, order, scope); +} + +float ff1(global atomic_float *d) { + // CHECK-LABEL: @ff1 + // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic + return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); +} + +void ff2(atomic_float *d) { + // CHECK-LABEL: @ff2 + // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release + __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); +} + +float ff3(atomic_float *d) { + // CHECK-LABEL: @ff3 + // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst + return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); +} + +// CHECK-LABEL: @atomic_init_foo +void atomic_init_foo() +{ + // CHECK-NOT: atomic + // CHECK: store + __opencl_atomic_init(&j, 42); + + // CHECK-NOT: atomic + // CHECK: } +} + +// CHECK-LABEL: @failureOrder +void failureOrder(atomic_int *ptr, int *ptr2) { + // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic + __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); + + // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire + __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); +} + +// CHECK-LABEL: @generalFailureOrder +void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { + __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); + // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE]] + // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] + + // CHECK: [[MONOTONIC]] + // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: ] + + // CHECK: [[ACQUIRE]] + // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[RELEASE]] + // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: ] + + // CHECK: [[ACQREL]] + // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[SEQCST]] + // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ + // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] + // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] + // CHECK-NEXT: ] + + // CHECK: [[MONOTONIC_MONOTONIC]] + // CHECK: cmpxchg {{.*}} monotonic monotonic + // CHECK: br + + // CHECK: [[ACQUIRE_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acquire monotonic + // CHECK: br + + // CHECK: [[ACQUIRE_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acquire acquire + // CHECK: br + + // CHECK: [[ACQREL_MONOTONIC]] + // CHECK: cmpxchg {{.*}} acq_rel monotonic + // CHECK: br + + // CHECK: [[ACQREL_ACQUIRE]] + // CHECK: cmpxchg {{.*}} acq_rel acquire + // CHECK: br + + // CHECK: [[SEQCST_MONOTONIC]] + // CHECK: cmpxchg {{.*}} seq_cst monotonic + // CHECK: br + + // CHECK: [[SEQCST_ACQUIRE]] + // CHECK: cmpxchg {{.*}} seq_cst acquire + // CHECK: br + + // CHECK: [[SEQCST_SEQCST]] + // CHECK: cmpxchg {{.*}} seq_cst seq_cst + // CHECK: br +} + +int test_volatile(volatile atomic_int *i) { + // CHECK-LABEL: @test_volatile + // CHECK: %[[i_addr:.*]] = alloca i32 + // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 + // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]] + // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]] + // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst + // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]] + // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]] + // CHECK-NEXT: ret i32 %[[retval]] + return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); +} + +#endif |
