1 files changed, 47 insertions, 47 deletions
diff --git a/test/CodeGen/AMDGPU/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll
index 5c20e9a8d585..931051102cd5 100644
--- a/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -4,13 +4,13 @@
 ; CHECK-LABEL: {{^}}phi1:
 ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
 ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
-define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @phi1(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
 main_body:
-  %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
-  %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
-  %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
-  %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
-  %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
+  %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+  %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+  %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
+  %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
+  %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
   %tmp24 = fptosi float %tmp22 to i32
   %tmp25 = icmp ne i32 %tmp24, 0
   br i1 %tmp25, label %ENDIF, label %ELSE
@@ -28,29 +28,29 @@ ENDIF:                                            ; preds = %ELSE, %main_body
 
 ; Make sure this program doesn't crash
 ; CHECK-LABEL: {{^}}phi2:
-define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
+define amdgpu_ps void @phi2(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #1 {
 main_body:
-  %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
-  %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
-  %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 16)
-  %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 32)
-  %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 36)
-  %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 40)
-  %tmp25 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 48)
-  %tmp26 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 52)
-  %tmp27 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 56)
-  %tmp28 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 64)
-  %tmp29 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 68)
-  %tmp30 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 72)
-  %tmp31 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 76)
-  %tmp32 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 80)
-  %tmp33 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 84)
-  %tmp34 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 88)
-  %tmp35 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 92)
+  %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+  %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+  %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 16)
+  %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 32)
+  %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 36)
+  %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 40)
+  %tmp25 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 48)
+  %tmp26 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 52)
+  %tmp27 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 56)
+  %tmp28 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 64)
+  %tmp29 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 68)
+  %tmp30 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 72)
+  %tmp31 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 76)
+  %tmp32 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 80)
+  %tmp33 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 84)
+  %tmp34 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 88)
+  %tmp35 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 92)
   %tmp36 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %arg2, i32 0
   %tmp37 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp36, !tbaa !0
-  %tmp38 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg1, i32 0
-  %tmp39 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp38, !tbaa !0
+  %tmp38 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg1, i32 0
+  %tmp39 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp38, !tbaa !0
   %i.i = extractelement <2 x i32> %arg5, i32 0
   %j.i = extractelement <2 x i32> %arg5, i32 1
   %i.f.i = bitcast i32 %i.i to float
@@ -85,7 +85,7 @@ main_body:
   %tmp46 = bitcast float %p2.i24 to i32
   %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
   %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
-  %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
+  %tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
   %a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
   %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
   %tmp50 = extractelement <4 x float> %tmp1, i32 2
@@ -173,14 +173,14 @@ ENDIF24:                                          ; preds = %IF25, %ENDIF
 
 ; We just want ot make sure the program doesn't crash
 ; CHECK-LABEL: {{^}}loop:
-define amdgpu_ps void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
+define amdgpu_ps void @loop(<4 x i32> addrspace(2)* inreg %arg, <4 x i32> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
 main_body:
-  %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
-  %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
-  %tmp21 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 0)
-  %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 4)
-  %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 8)
-  %tmp24 = call float @llvm.SI.load.const(<16 x i8> %tmp20, i32 12)
+  %tmp = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %arg, i32 0
+  %tmp20 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+  %tmp21 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 0)
+  %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 4)
+  %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 8)
+  %tmp24 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp20, i32 12)
   %tmp25 = fptosi float %tmp24 to i32
   %tmp26 = bitcast i32 %tmp25 to float
   %tmp27 = bitcast float %tmp26 to i32
@@ -226,17 +226,17 @@ ENDIF:                                            ; preds = %LOOP
 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
 ; CHECK: exp
 ; CHECK: s_endpgm
-define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
 entry:
-  %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
-  %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
-  %tmp22 = call float @llvm.SI.load.const(<16 x i8> %tmp21, i32 16)
+  %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i64 0, i32 0
+  %tmp21 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !0
+  %tmp22 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp21, i32 16)
   %tmp23 = getelementptr [16 x <8 x i32>], [16 x <8 x i32>] addrspace(2)* %arg2, i64 0, i32 0
   %tmp24 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp23, !tbaa !0
-  %tmp25 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %arg1, i64 0, i32 0
-  %tmp26 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp25, !tbaa !0
+  %tmp25 = getelementptr [32 x <4 x i32>], [32 x <4 x i32>] addrspace(2)* %arg1, i64 0, i32 0
+  %tmp26 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp25, !tbaa !0
   %tmp27 = fcmp oeq float %tmp22, 0.000000e+00
-  %tmp26.bc = bitcast <16 x i8> %tmp26 to <4 x i32>
+  %tmp26.bc = bitcast <4 x i32> %tmp26 to <4 x i32>
   br i1 %tmp27, label %if, label %else
 
 if:                                               ; preds = %entry
@@ -290,7 +290,7 @@ endif:                                            ; preds = %if1, %if0, %entry
 ; This test is just checking that we don't crash / assertion fail.
 ; CHECK-LABEL: {{^}}copy2:
 ; CHECK: s_endpgm
-define amdgpu_ps void @copy2([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
+define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(2)* byval %arg, [32 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
 entry:
   br label %LOOP68
 
@@ -326,11 +326,11 @@ ENDIF69:                                          ; preds = %LOOP68
 ; [[END]]:
 ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
 ; CHECK: s_endpgm
-define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(2)* byval %arg, [17 x <4 x i32>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
 bb:
-  %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
-  %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !3
-  %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
+  %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg1, i32 0, i32 0
+  %tmp22 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp, !tbaa !3
+  %tmp23 = call float @llvm.SI.load.const.v4i32(<4 x i32> %tmp22, i32 16)
   %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
   %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !3
   %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
@@ -420,7 +420,7 @@ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
 declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
 declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
-declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }