summaryrefslogtreecommitdiff
path: root/test/CodeGen/AMDGPU/wqm.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/AMDGPU/wqm.ll')
-rw-r--r--test/CodeGen/AMDGPU/wqm.ll51
1 files changed, 44 insertions, 7 deletions
diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll
index 4c9a8d5a938c..3d1818368487 100644
--- a/test/CodeGen/AMDGPU/wqm.ll
+++ b/test/CodeGen/AMDGPU/wqm.ll
@@ -260,8 +260,9 @@ main_body:
}
; Check that WWM is turned on correctly across basic block boundaries.
+; if..then..endif version
;
-;CHECK-LABEL: {{^}}test_wwm6:
+;CHECK-LABEL: {{^}}test_wwm6_then:
;CHECK: s_or_saveexec_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], -1
;SI-CHECK: buffer_load_dword
;VI-CHECK: flat_load_dword
@@ -272,7 +273,7 @@ main_body:
;VI-CHECK: flat_load_dword
;CHECK: v_add_f32_e32
;CHECK: s_mov_b64 exec, [[ORIG2]]
-define amdgpu_ps float @test_wwm6() {
+define amdgpu_ps float @test_wwm6_then() {
main_body:
%src0 = load volatile float, float addrspace(1)* undef
; use mbcnt to make sure the branch is divergent
@@ -292,6 +293,40 @@ endif:
ret float %out.1
}
+; Check that WWM is turned on correctly across basic block boundaries.
+; loop version
+;
+;CHECK-LABEL: {{^}}test_wwm6_loop:
+;CHECK: s_or_saveexec_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], -1
+;SI-CHECK: buffer_load_dword
+;VI-CHECK: flat_load_dword
+;CHECK: s_mov_b64 exec, [[ORIG]]
+;CHECK: %loop
+;CHECK: s_or_saveexec_b64 [[ORIG2:s\[[0-9]+:[0-9]+\]]], -1
+;SI-CHECK: buffer_load_dword
+;VI-CHECK: flat_load_dword
+;CHECK: s_mov_b64 exec, [[ORIG2]]
+define amdgpu_ps float @test_wwm6_loop() {
+main_body:
+ %src0 = load volatile float, float addrspace(1)* undef
+ ; use mbcnt to make sure the branch is divergent
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+ %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo)
+ br label %loop
+
+loop:
+ %counter = phi i32 [ %lo, %main_body ], [ %counter.1, %loop ]
+ %src1 = load volatile float, float addrspace(1)* undef
+ %out = fadd float %src0, %src1
+ %out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
+ %counter.1 = sub i32 %counter, 1
+ %cc = icmp ne i32 %counter.1, 0
+ br i1 %cc, label %loop, label %endloop
+
+endloop:
+ ret float %out.0
+}
+
; Check that @llvm.amdgcn.set.inactive disables WWM.
;
;CHECK-LABEL: {{^}}test_set_inactive1:
@@ -551,7 +586,8 @@ main_body:
%data.0 = extractelement <2 x float> %data, i32 0
call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0)
- call void @llvm.AMDGPU.kill(float %z)
+ %z.cmp = fcmp olt float %z, 0.0
+ call void @llvm.amdgcn.kill(i1 %z.cmp)
%idx.1 = extractelement <2 x i32> %idx, i32 1
%data.1 = extractelement <2 x float> %data, i32 1
@@ -584,7 +620,8 @@ main_body:
call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
- call void @llvm.AMDGPU.kill(float %z)
+ %z.cmp = fcmp olt float %z, 0.0
+ call void @llvm.amdgcn.kill(i1 %z.cmp)
ret <4 x float> %dtex
}
@@ -610,11 +647,11 @@ main_body:
; CHECK: image_store
; CHECK: s_wqm_b64 exec, exec
; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
-; CHECK-DAG: v_mov_b32_e32 [[SEVEN:v[0-9]+]], 0x40e00000
+; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000
; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
-; CHECK: v_cmp_gt_f32_e32 vcc, [[CTR]], [[SEVEN]]
+; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
; CHECK: s_cbranch_vccz [[LOOPHDR]]
; CHECK: ; %break
@@ -791,7 +828,7 @@ declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i3
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare void @llvm.AMDGPU.kill(float) #1
+declare void @llvm.amdgcn.kill(i1) #1
declare float @llvm.amdgcn.wqm.f32(float) #3
declare i32 @llvm.amdgcn.wqm.i32(i32) #3
declare float @llvm.amdgcn.wwm.f32(float) #3