diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll | 34 |
1 files changed, 17 insertions, 17 deletions
diff --git a/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll index e4f6e72e69773..ac2f7b4a4a4b3 100644 --- a/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ b/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -4,25 +4,22 @@ ; If spilling to smem, additional registers are used for the resource ; descriptor. -; ALL-LABEL: {{^}}max_12_sgprs: +; ALL-LABEL: {{^}}max_9_sgprs: -; FIXME: Should be ablo to skip this copying of the private segment -; buffer because all the SGPR spills are to VGPRs. - -; ALL: s_mov_b64 s[10:11], s[2:3] -; ALL: s_mov_b64 s[8:9], s[0:1] ; ALL: SGPRBlocks: 1 -; ALL: NumSGPRsForWavesPerEU: 14 -define void @max_12_sgprs(i32 addrspace(1)* %out1, +; ALL: NumSGPRsForWavesPerEU: 9 +define amdgpu_kernel void @max_9_sgprs(i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, - i32 %one, i32 %two, i32 %three, i32 %four) #0 { + i32 addrspace(1)* %out5, + i32 %one, i32 %two, i32 %three, i32 %four, i32 %five) #0 { store i32 %one, i32 addrspace(1)* %out1 store i32 %two, i32 addrspace(1)* %out2 store i32 %three, i32 addrspace(1)* %out3 store i32 %four, i32 addrspace(1)* %out4 + store i32 %five, i32 addrspace(1)* %out5 ret void } @@ -52,23 +49,26 @@ define void @max_12_sgprs(i32 addrspace(1)* %out1, ; TOSMEM: SGPRBlocks: 1 ; TOSMEM: NumSGPRsForWavesPerEU: 16 -define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, +define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 %one, i32 %two, i32 %three, i32 %four) #2 { - store volatile i32 0, i32* undef %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %x.0, i32 addrspace(1)* undef %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %x.0, i32 addrspace(1)* undef %x.2 = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %x.0, i32 addrspace(1)* undef %x.3 = call i64 @llvm.amdgcn.dispatch.id() - store volatile i64 %x.3, i64 addrspace(1)* undef %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() - store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() + store volatile i32 0, i32* undef + br label %stores + +stores: + store volatile i32 %x.0, i32 addrspace(1)* undef + store volatile i32 %x.0, i32 addrspace(1)* undef + store volatile i32 %x.0, i32 addrspace(1)* undef + store volatile i64 %x.3, i64 addrspace(1)* undef + store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef store i32 %one, i32 addrspace(1)* %out1 @@ -90,7 +90,7 @@ define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, ; XALL: SGPRBlocks: 2 ; XALL: NumSGPRsForWavesPerEU: 18 -;define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1, +;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1, ; i32 addrspace(1)* %out2, ; i32 addrspace(1)* %out3, ; i32 addrspace(1)* %out4, |