diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-04-16 16:01:22 +0000 |
commit | 71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch) | |
tree | 5343938942df402b49ec7300a1c25a2d4ccd5821 /test/Transforms/LoopStrengthReduce/AMDGPU | |
parent | 31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff) |
Diffstat (limited to 'test/Transforms/LoopStrengthReduce/AMDGPU')
5 files changed, 127 insertions, 9 deletions
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll new file mode 100644 index 000000000000..054c61d18795 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +; Make sure the pointer / address space of AtomicRMW is considered + +; OPT-LABEL: @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32( + +; OPT-NOT: getelementptr + +; OPT: .lr.ph: +; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] +; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ] +; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383 +; OPT: %tmp4 = atomicrmw add i32 addrspace(3)* %scevgep4, i32 undef seq_cst +; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst +; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst +; OPT: br i1 %exitcond +define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i32 %indvars.iv, 16383 + %tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1 + %tmp4 = atomicrmw add i32 addrspace(3)* %tmp3, i32 undef seq_cst + %tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv + %tmp7 = atomicrmw add i32 addrspace(3)* %tmp6, i32 undef seq_cst + %tmp8 = add nsw i32 %tmp7, %tmp4 + atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +; OPT-LABEL: test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32( +; OPT-NOT: getelementptr + +; OPT: .lr.ph: +; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] +; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ] +; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383 +; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %scevgep4, i32 undef, i32 undef seq_cst monotonic +define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i32 %indvars.iv, 16383 + %tmp3 = getelementptr inbounds i32, i32 addrspace(3)* %arg1, i32 %tmp1 + %tmp4 = cmpxchg i32 addrspace(3)* %tmp3, i32 undef, i32 undef seq_cst monotonic + %tmp4.0 = extractvalue { i32, i1 } %tmp4, 0 + %tmp6 = getelementptr inbounds i32, i32 addrspace(3)* %arg0, i32 %indvars.iv + %tmp7 = cmpxchg i32 addrspace(3)* %tmp6, i32 undef, i32 undef seq_cst monotonic + %tmp7.0 = extractvalue { i32, i1 } %tmp7, 0 + %tmp8 = add nsw i32 %tmp7.0, %tmp4.0 + atomicrmw add i32 addrspace(3)* %tmp6, i32 %tmp8 seq_cst + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +attributes #0 = { nounwind }
\ No newline at end of file diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll index bf61112a3c3e..c5ea1b915d91 100644 --- a/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll +++ b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll @@ -10,7 +10,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095 ; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1 -define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge @@ -48,7 +48,7 @@ bb: ; OPT: {{^}}.lr.ph: ; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1 -define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge @@ -83,7 +83,7 @@ bb: ; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535 ; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1 -define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge @@ -122,7 +122,7 @@ bb: ; OPT: {{^}}.lr.ph: ; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1 -define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll new file mode 100644 index 000000000000..02c3c05e7945 --- /dev/null +++ b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +target triple = "amdgcn--" + +; We need to compile this for a target where we have different address spaces, +; and where pointers in those address spaces have different size. +; E.g. for amdgcn-- pointers in address space 0 are 32 bits and pointers in +; address space 1 are 64 bits. + +; We shouldn't crash. Check that we get a loop with the two stores. +;CHECK-LABEL: foo: +;CHECK: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: +;CHECK: buffer_store_dword +;CHECK: buffer_store_dword +;CHECK: s_branch [[LOOP_LABEL]] + +define amdgpu_kernel void @foo() { +entry: + br label %loop + +loop: + %idx0 = phi i32 [ %next_idx0, %loop ], [ 0, %entry ] + %0 = getelementptr inbounds i32, i32* null, i32 %idx0 + %1 = getelementptr inbounds i32, i32 addrspace(1)* null, i32 %idx0 + store i32 1, i32* %0 + store i32 7, i32 addrspace(1)* %1 + %next_idx0 = add nuw nsw i32 %idx0, 1 + br label %loop +} + diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll index 8c83df5843d2..67b1926bdf27 100644 --- a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll +++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll @@ -16,7 +16,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: bb: ; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)* ; CHECK: %c1 = icmp ne i8 addrspace(3)* -define void @local_cmp_user(i32 %arg0) nounwind { +define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind { entry: br label %bb11 @@ -47,7 +47,7 @@ bb13: ; CHECK: bb: ; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)* ; CHECK: icmp ne i8 addrspace(1)* %t -define void @global_cmp_user(i64 %arg0) nounwind { +define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind { entry: br label %bb11 @@ -78,7 +78,7 @@ bb13: ; CHECK: bb: ; CHECK: %idxprom = sext i32 %lsr.iv1 to i64 ; CHECK: getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom -define void @global_gep_user(i32 %arg0) nounwind { +define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind { entry: br label %bb11 @@ -108,7 +108,7 @@ bb13: ; CHECK: bb ; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext -define void @global_sext_scale_user(i32 %arg0) nounwind { +define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind { entry: br label %bb11 diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll index b3b696d42c59..9eba0c3051dc 100644 --- a/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll +++ b/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll @@ -14,7 +14,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: %scevgep = getelementptr i32, i32 addrspace(3)* %tmp1, i32 4 ; CHECK:%tmp14 = load i32, i32 addrspace(3)* %scevgep -define void @lsr_crash_preserve_addrspace_unknown_type() #0 { +define amdgpu_kernel void @lsr_crash_preserve_addrspace_unknown_type() #0 { bb: br label %bb1 |