summaryrefslogtreecommitdiff
path: root/test/CodeGen/AMDGPU/sgpr-copy.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/AMDGPU/sgpr-copy.ll')
-rw-r--r--test/CodeGen/AMDGPU/sgpr-copy.ll94
1 files changed, 47 insertions, 47 deletions
diff --git a/test/CodeGen/AMDGPU/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll
index 5c20e9a8d585..931051102cd5 100644
--- a/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -4,13 +4,13 @@
; CHECK-LABEL: {{^}}phi1:
; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
%tmp24 = fptosi float %tmp22 to i32
%tmp25 = icmp ne i32 %tmp24, 0
br i1 %tmp25, label %ENDIF, label %ELSE
@@ -28,29 +28,29 @@ ENDIF: ; preds = %ELSE, %main_body
; Make sure this program doesn't crash
; CHECK-LABEL: {{^}}phi2:
-define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
+define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 36)
- %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 40)
- %tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 48)
- %tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 52)
- %tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 56)
- %tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 64)
- %tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 68)
- %tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 72)
- %tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 76)
- %tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 80)
- %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84)
- %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88)
- %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
+ %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 40)
+ %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 48)
+ %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 52)
+ %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 56)
+ %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 64)
+ %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 68)
+ %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 72)
+ %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 76)
+ %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 80)
+ %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
+ %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
+ %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
%tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
%tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
- %tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
- %tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0
+ %tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
+ %tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0
%i.i = extractelement <2 x i32> %arg5, i32 0
%j.i = extractelement <2 x i32> %arg5, i32 1
%i.f.i = bitcast i32 %i.i to float
@@ -85,7 +85,7 @@ main_body:
%tmp46 = bitcast float %p2.i24 to i32
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
%tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
- %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
+ %tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
%a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp50 = extractelement <4 x float> %tmp1, i32 2
@@ -173,14 +173,14 @@ ENDIF24: ; preds = %IF25, %ENDIF
; We just want ot make sure the program doesn't crash
; CHECK-LABEL: {{^}}loop:
-define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
main_body:
- %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
- %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 4)
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 8)
- %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 12)
+ %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+ %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
+ %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 12)
%tmp25 = fptosi float %tmp24 to i32
%tmp26 = bitcast i32 %tmp25 to float
%tmp27 = bitcast float %tmp26 to i32
@@ -226,17 +226,17 @@ ENDIF: ; preds = %LOOP
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
-define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
- %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
- %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16)
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
+ %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+ %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
%tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
%tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
- %tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
- %tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0
+ %tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
+ %tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0
%tmp27 = fcmp oeq float %tmp22, 0.000000e+00
- %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32>
+ %tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32>
br i1 %tmp27, label %if, label %else
if: ; preds = %entry
@@ -290,7 +290,7 @@ endif: ; preds = %if1, %if0, %entry
; This test is just checking that we don't crash / assertion fail.
; CHECK-LABEL: {{^}}copy2:
; CHECK: s_endpgm
-define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
entry:
br label %LOOP68
@@ -326,11 +326,11 @@ ENDIF69: ; preds = %LOOP68
; [[END]]:
; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
-define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
- %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
- %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
- %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
+ %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0
+ %tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3
+ %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
%tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
%tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3
%tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
@@ -420,7 +420,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }