summaryrefslogtreecommitdiff
path: root/test/CodeGen/AMDGPU/valu-i1.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/AMDGPU/valu-i1.ll')
-rw-r--r--test/CodeGen/AMDGPU/valu-i1.ll97
1 files changed, 84 insertions, 13 deletions
diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll
index e64f8467240ae..85a8929ebe586 100644
--- a/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/test/CodeGen/AMDGPU/valu-i1.ll
@@ -29,7 +29,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]]
; SI-NEXT: ; mask branch
;
-define void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
+define amdgpu_kernel void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
switch i32 %tid, label %default [
@@ -64,29 +64,100 @@ end:
ret void
}
-; SI-LABEL: @simple_test_v_if
+; SI-LABEL: {{^}}simple_test_v_if:
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+; SI: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
-; SI: BB{{[0-9]+_[0-9]+}}:
+; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
-; SI: s_endpgm
+; SI-NEXT: s_waitcnt
-; SI: BB1_2:
+; SI-NEXT: {{^}}[[EXIT]]:
; SI: s_or_b64 exec, exec, [[BR_SREG]]
; SI: s_endpgm
-define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+define amdgpu_kernel void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%is.0 = icmp ne i32 %tid, 0
- br i1 %is.0, label %store, label %exit
+ br i1 %is.0, label %then, label %exit
+
+then:
+ %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
+ store i32 999, i32 addrspace(1)* %gep
+ br label %exit
+
+exit:
+ ret void
+}
+
+; FIXME: It would be better to endpgm in the then block.
+
+; SI-LABEL: {{^}}simple_test_v_if_ret_else_ret:
+; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+; SI: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
+
+; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
+; SI: buffer_store_dword
+; SI-NEXT: s_waitcnt
+
+; SI-NEXT: {{^}}[[EXIT]]:
+; SI: s_or_b64 exec, exec, [[BR_SREG]]
+; SI: s_endpgm
+define amdgpu_kernel void @simple_test_v_if_ret_else_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %is.0 = icmp ne i32 %tid, 0
+ br i1 %is.0, label %then, label %exit
+
+then:
+ %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
+ store i32 999, i32 addrspace(1)* %gep
+ ret void
+
+exit:
+ ret void
+}
+
+; Final block has more than a ret to execute. This was miscompiled
+; before function exit blocks were unified since the endpgm would
+; terminate the then wavefront before reaching the store.
+
+; SI-LABEL: {{^}}simple_test_v_if_ret_else_code_ret:
+; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+; SI: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
+
+; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %exit
+; SI: ds_write_b32
+; SI: s_waitcnt
+
+; SI-NEXT: {{^}}[[FLOW]]:
+; SI-NEXT: s_or_saveexec_b64
+; SI-NEXT: s_xor_b64 exec, exec
+; SI-NEXT: ; mask branch [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]]
+
+; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %then
+; SI: buffer_store_dword
+; SI-NEXT: s_waitcnt
+
+; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock
+; SI: s_or_b64 exec, exec
+; SI: s_endpgm
+define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %is.0 = icmp ne i32 %tid, 0
+ br i1 %is.0, label %then, label %exit
-store:
+then:
%gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
store i32 999, i32 addrspace(1)* %gep
ret void
exit:
+ store volatile i32 7, i32 addrspace(3)* undef
ret void
}
@@ -101,12 +172,12 @@ exit:
; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]:
; SI: buffer_load_dword
; SI-DAG: buffer_store_dword
-; SI-DAG: s_cmpk_eq_i32 s{{[0-9]+}}, 0x100
-; SI: s_cbranch_scc0 [[LABEL_LOOP]]
+; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100
+; SI: s_cbranch_vccz [[LABEL_LOOP]]
; SI: [[LABEL_EXIT]]:
; SI: s_endpgm
-define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+define amdgpu_kernel void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%is.0 = icmp ne i32 %tid, 0
@@ -156,7 +227,7 @@ exit:
; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
; SI: buffer_store_dword
-; SI: v_cmp_ge_i64_e32 [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
+; SI: v_cmp_ge_i64_e{{32|64}} [[CMP:s\[[0-9]+:[0-9]+\]|vcc]]
; SI: s_or_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[CMP]], [[COND_STATE]]
; SI: [[LABEL_FLOW]]:
@@ -173,7 +244,7 @@ exit:
; SI-NOT: [[COND_STATE]]
; SI: s_endpgm
-define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
+define amdgpu_kernel void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp4 = sext i32 %tmp to i64