diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll')
-rw-r--r-- | test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll index fd1a463fd3e99..f0af876567b49 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK %s ; CHECK-LABEL: {{^}}test1: ; CHECK: v_cndmask_b32_e64 v0, 0, 1, exec @@ -7,7 +7,7 @@ ; there is no WQM use and therefore llvm.amdgcn.ps.live is constant. However, ; the expectation is that the intrinsic will be used in non-trivial shaders, ; so such an optimization doesn't seem worth the effort. -define amdgpu_ps float @test1() { +define amdgpu_ps float @test1() #0 { %live = call i1 @llvm.amdgcn.ps.live() %live.32 = zext i1 %live to i32 %r = bitcast i32 %live.32 to float @@ -19,12 +19,11 @@ define amdgpu_ps float @test1() { ; CHECK-DAG: s_wqm_b64 exec, exec ; CHECK-DAG: v_cndmask_b32_e64 [[VAR:v[0-9]+]], 0, 1, [[LIVE]] ; CHECK: image_sample v0, [[VAR]], -define amdgpu_ps float @test2() { +define amdgpu_ps float @test2() #0 { %live = call i1 @llvm.amdgcn.ps.live() %live.32 = zext i1 %live to i32 - - %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %live.32, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - + %live.32.bc = bitcast i32 %live.32 to float + %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) %r = extractelement <4 x float> %t, i32 0 ret float %r } @@ -35,7 +34,7 @@ define amdgpu_ps float @test2() { ; CHECK-DAG: s_xor_b64 [[HELPER:s\[[0-9]+:[0-9]+\]]], [[LIVE]], -1 ; CHECK_DAG: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[HELPER]] ; CHECK: ; %dead -define amdgpu_ps float @test3(i32 %in) { +define amdgpu_ps float @test3(i32 %in) #0 { entry: %live = call i1 @llvm.amdgcn.ps.live() br i1 %live, label %end, label %dead @@ -46,14 +45,15 @@ dead: end: %tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ] - %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %tc, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - + %tc.bc = bitcast i32 %tc to float + %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %tc.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0 %r = extractelement <4 x float> %t, i32 0 ret float %r } -declare i1 @llvm.amdgcn.ps.live() #0 - -declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 +declare i1 @llvm.amdgcn.ps.live() #1 +declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -attributes #0 = { nounwind readnone } +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } |