diff options
Diffstat (limited to 'test/CodeGen/AMDGPU/sgpr-copy.ll')
| -rw-r--r-- | test/CodeGen/AMDGPU/sgpr-copy.ll | 94 |
1 files changed, 47 insertions, 47 deletions
diff --git a/test/CodeGen/AMDGPU/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll index 5c20e9a8d585..931051102cd5 100644 --- a/test/CodeGen/AMDGPU/sgpr-copy.ll +++ b/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -4,13 +4,13 @@ ; CHECK-LABEL: {{^}}phi1: ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0 ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]] -define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { +define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { main_body: - %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0 - %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 - %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0) - %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16) - %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32) + %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0 + %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0 + %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0) + %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16) + %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32) %tmp24 = fptosi float %tmp22 to i32 %tmp25 = icmp ne i32 %tmp24, 0 br i1 %tmp25, label %ENDIF, label %ELSE @@ -28,29 +28,29 @@ ENDIF: ; preds = %ELSE, %main_body ; Make sure this program doesn't crash ; CHECK-LABEL: {{^}}phi2: -define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 { +define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 { main_body: - %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0 - %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 - %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16) - %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32) - %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 36) - %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 40) - %tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 48) - %tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 52) - %tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 56) - %tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 64) - %tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 68) - %tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 72) - %tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 76) - %tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 80) - %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84) - %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88) - %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92) + %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0 + %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0 + %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16) + %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32) + %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36) + %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 40) + %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 48) + %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 52) + %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 56) + %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 64) + %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 68) + %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 72) + %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 76) + %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 80) + %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84) + %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88) + %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92) %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0 %tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0 - %tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0 - %tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0 + %tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0 + %tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0 %i.i = extractelement <2 x i32> %arg5, i32 0 %j.i = extractelement <2 x i32> %arg5, i32 1 %i.f.i = bitcast i32 %i.i to float @@ -85,7 +85,7 @@ main_body: %tmp46 = bitcast float %p2.i24 to i32 %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0 %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1 - %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32> + %tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32> %a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float> %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) %tmp50 = extractelement <4 x float> %tmp1, i32 2 @@ -173,14 +173,14 @@ ENDIF24: ; preds = %IF25, %ENDIF ; We just want ot make sure the program doesn't crash ; CHECK-LABEL: {{^}}loop: -define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { +define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 { main_body: - %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0 - %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 - %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0) - %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 4) - %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 8) - %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 12) + %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0 + %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0 + %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0) + %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4) + %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8) + %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 12) %tmp25 = fptosi float %tmp24 to i32 %tmp26 = bitcast i32 %tmp25 to float %tmp27 = bitcast float %tmp26 to i32 @@ -226,17 +226,17 @@ ENDIF: ; preds = %LOOP ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}} ; CHECK: exp ; CHECK: s_endpgm -define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { +define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { entry: - %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0 - %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0 - %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16) + %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0 + %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0 + %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16) %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0 %tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0 - %tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0 - %tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0 + %tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0 + %tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0 %tmp27 = fcmp oeq float %tmp22, 0.000000e+00 - %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32> + %tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32> br i1 %tmp27, label %if, label %else if: ; preds = %entry @@ -290,7 +290,7 @@ endif: ; preds = %if1, %if0, %entry ; This test is just checking that we don't crash / assertion fail. ; CHECK-LABEL: {{^}}copy2: ; CHECK: s_endpgm -define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { +define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { entry: br label %LOOP68 @@ -326,11 +326,11 @@ ENDIF69: ; preds = %LOOP68 ; [[END]]: ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}} ; CHECK: s_endpgm -define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { +define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { bb: - %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0 - %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3 - %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16) + %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0 + %tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3 + %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16) %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0 %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3 %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0 @@ -420,7 +420,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2 -declare float @llvm.SI.load.const(<16 x i8>, i32) #1 +declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } |
