diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /test/CodeGen/AMDGPU/cgp-addressing-modes.ll | |
parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) |
Diffstat (limited to 'test/CodeGen/AMDGPU/cgp-addressing-modes.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/cgp-addressing-modes.ll | 222 |
1 files changed, 199 insertions, 23 deletions
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 2ed2857ff340..c1cf56e5058e 100644 --- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -5,15 +5,17 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + ; OPT-LABEL: @test_sink_global_small_offset_i32( ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in ; OPT: br i1 -; OPT-CI: ptrtoint +; OPT-CI: getelementptr i8, ; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: ; GCN: {{^}}BB0_2: -define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7 @@ -43,7 +45,7 @@ done: ; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} ; GCN: {{^}}BB1_2: ; GCN: s_or_b64 exec -define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 @@ -70,7 +72,7 @@ done: ; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} ; GCN: {{^}}BB2_2: ; GCN: s_or_b64 exec -define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095 @@ -97,7 +99,7 @@ done: ; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} ; GCN: {{^}}BB3_2: ; GCN: s_or_b64 exec -define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096 @@ -122,14 +124,55 @@ done: ; OPT-LABEL: @test_sink_scratch_small_offset_i32( ; OPT-NOT: getelementptr [512 x i32] ; OPT: br i1 -; OPT: ptrtoint +; OPT: getelementptr i8, ; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32: ; GCN: s_and_saveexec_b64 ; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}} ; GCN: {{^}}BB4_2: -define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { +define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { +entry: + %alloca = alloca [512 x i32], align 4 + %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 + %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 + %add.arg = add i32 %arg, 8 + %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1022 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + store volatile i32 123, i32* %alloca.gep + %tmp1 = load volatile i32, i32* %alloca.gep + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(1)* %out.gep.0 + %load = load volatile i32, i32* %alloca.gep + store i32 %load, i32 addrspace(1)* %out.gep.1 + br label %done + +done: + ret void +} + +; This ends up not fitting due to the reserved 4 bytes at offset 0 +; OPT-LABEL: @test_sink_scratch_small_offset_i32_reserved( +; OPT-NOT: getelementptr [512 x i32] +; OPT: br i1 +; OPT: getelementptr i8, + +; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32_reserved: +; GCN: s_and_saveexec_b64 +; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 4 +; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} +; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4 +; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}} +; GCN: {{^BB[0-9]+}}_2: + +define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 @@ -165,8 +208,8 @@ done: ; GCN: s_and_saveexec_b64 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} -; GCN: {{^}}BB5_2: -define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { +; GCN: {{^BB[0-9]+}}_2: +define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { entry: %alloca = alloca [512 x i32], align 4 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 @@ -197,8 +240,8 @@ done: ; GCN: s_and_saveexec_b64 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -; GCN: {{^}}BB6_2: -define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) { +; GCN: {{^BB[0-9]+}}_2: +define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) { entry: %offset.ext = zext i32 %offset to i64 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 @@ -228,7 +271,7 @@ done: ; GCN: s_and_saveexec_b64 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} ; GCN: s_or_b64 exec, exec -define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +define amdgpu_kernel void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 @@ -257,7 +300,7 @@ done: ; GCN: s_and_saveexec_b64 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} ; GCN: s_or_b64 exec, exec -define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 @@ -290,7 +333,7 @@ done: ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} ; GCN: s_or_b64 exec, exec -define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +define amdgpu_kernel void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 @@ -322,7 +365,7 @@ done: ; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN: s_or_b64 exec, exec -define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 @@ -353,7 +396,7 @@ done: ; GCN: s_addc_u32 ; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} ; GCN: s_or_b64 exec, exec -define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +define amdgpu_kernel void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 @@ -383,7 +426,7 @@ done: ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} ; GCN: s_or_b64 exec, exec -define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 @@ -421,7 +464,7 @@ done: ; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} ; GCN: s_or_b64 exec, exec -define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +define amdgpu_kernel void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 @@ -445,13 +488,13 @@ done: %struct.foo = type { [3 x float], [3 x float] } ; OPT-LABEL: @sink_ds_address( -; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64 +; OPT: getelementptr i8, ; GCN-LABEL: {{^}}sink_ds_address: ; GCN: s_load_dword [[SREG1:s[0-9]+]], ; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] ; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5 -define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind { +define amdgpu_kernel void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind { entry: %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2 @@ -476,9 +519,8 @@ bb34: ; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1( ; OPT: br i1 %tmp0, ; OPT: if: -; OPT: %sunkaddr = ptrtoint i8 addrspace(2)* %in to i64 -; OPT: %sunkaddr1 = add i64 %sunkaddr, 4095 -define void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { +; OPT: getelementptr i8, {{.*}} 4095 +define amdgpu_kernel void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { entry: %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095 @@ -500,7 +542,141 @@ done: ret void } +; OPT-LABEL: @test_sink_local_small_offset_atomicrmw_i32( +; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* +; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 +; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* +; OPT: %tmp1 = atomicrmw add i32 addrspace(3)* %1, i32 2 seq_cst +define amdgpu_kernel void @test_sink_local_small_offset_atomicrmw_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = atomicrmw add i32 addrspace(3)* %in.gep, i32 2 seq_cst + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_local_small_offset_cmpxchg_i32( +; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* +; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 +; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* +; OPT: %tmp1.struct = cmpxchg i32 addrspace(3)* %1, i32 undef, i32 2 seq_cst monotonic +define amdgpu_kernel void @test_sink_local_small_offset_cmpxchg_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1.struct = cmpxchg i32 addrspace(3)* %in.gep, i32 undef, i32 2 seq_cst monotonic + %tmp1 = extractvalue { i32, i1 } %tmp1.struct, 0 + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_wrong_operand_local_small_offset_cmpxchg_i32( +; OPT: %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 +; OPT: br i1 +; OPT: cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic +define amdgpu_kernel void @test_wrong_operand_local_small_offset_cmpxchg_i32(i32 addrspace(3)* addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1.struct = cmpxchg i32 addrspace(3)* addrspace(3)* undef, i32 addrspace(3)* %in.gep, i32 addrspace(3)* undef seq_cst monotonic + %tmp1 = extractvalue { i32 addrspace(3)*, i1 } %tmp1.struct, 0 + br label %endif + +endif: + %x = phi i32 addrspace(3)* [ %tmp1, %if ], [ null, %entry ] + store i32 addrspace(3)* %x, i32 addrspace(3)* addrspace(3)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32( +; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* +; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 +; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false) +define amdgpu_kernel void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false) + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32( +; OPT: %0 = bitcast i32 addrspace(3)* %in to i8 addrspace(3)* +; OPT: %sunkaddr = getelementptr i8, i8 addrspace(3)* %0, i32 28 +; OPT: %1 = bitcast i8 addrspace(3)* %sunkaddr to i32 addrspace(3)* +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %1, i32 2, i32 0, i32 0, i1 false) +define amdgpu_kernel void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2, i32 0, i32 0, i1 false) + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 +declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } +attributes #2 = { nounwind argmemonly } |