summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/vec_int_to_fp.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/vec_int_to_fp.ll')
-rw-r--r--test/CodeGen/X86/vec_int_to_fp.ll286
1 files changed, 144 insertions, 142 deletions
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index a42b3c96c3ae..7cb1c95cb01a 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1169,16 +1169,16 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-LABEL: sitofp_4i64_to_4f32_undef:
; SSE: # BB#0:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -1368,21 +1368,22 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-LABEL: sitofp_4i64_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: movq %xmm1, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_4i64_to_4f32:
@@ -1838,21 +1839,14 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-LABEL: uitofp_4i64_to_4f32_undef:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: js .LBB41_2
-; SSE-NEXT: # BB#1:
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: .LBB41_2:
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: js .LBB41_3
-; SSE-NEXT: # BB#4:
+; SSE-NEXT: js .LBB41_1
+; SSE-NEXT: # BB#2:
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: jmp .LBB41_5
-; SSE-NEXT: .LBB41_3:
+; SSE-NEXT: jmp .LBB41_3
+; SSE-NEXT: .LBB41_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
@@ -1860,17 +1854,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: addss %xmm0, %xmm0
-; SSE-NEXT: .LBB41_5:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: .LBB41_3:
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: js .LBB41_6
-; SSE-NEXT: # BB#7:
+; SSE-NEXT: js .LBB41_4
+; SSE-NEXT: # BB#5:
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: jmp .LBB41_8
-; SSE-NEXT: .LBB41_6:
+; SSE-NEXT: jmp .LBB41_6
+; SSE-NEXT: .LBB41_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
@@ -1878,9 +1871,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: addss %xmm1, %xmm1
-; SSE-NEXT: .LBB41_8:
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: .LBB41_6:
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: js .LBB41_8
+; SSE-NEXT: # BB#7:
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: .LBB41_8:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2149,32 +2149,32 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: jmp .LBB47_3
; SSE-NEXT: .LBB47_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB47_3:
-; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB47_6
; SSE-NEXT: .LBB47_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: addss %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB47_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_7
; SSE-NEXT: # BB#8:
@@ -2208,9 +2208,9 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB47_12:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_4i64_to_4f32:
@@ -3381,22 +3381,23 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; SSE-LABEL: sitofp_load_4i64_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm2
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_4i64_to_4f32:
@@ -3546,41 +3547,42 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-LABEL: sitofp_load_8i64_to_8f32:
; SSE: # BB#0:
; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm2
-; SSE-NEXT: movdqa 32(%rdi), %xmm3
-; SSE-NEXT: movdqa 48(%rdi), %xmm4
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movdqa 32(%rdi), %xmm2
+; SSE-NEXT: movdqa 48(%rdi), %xmm3
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: movq %xmm4, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: xorps %xmm4, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT: movq %xmm2, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_8i64_to_8f32:
@@ -3822,73 +3824,73 @@ define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) {
define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; SSE-LABEL: uitofp_load_4i64_to_4f32:
; SSE: # BB#0:
-; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm3
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movdqa (%rdi), %xmm2
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: jmp .LBB76_3
; SSE-NEXT: .LBB76_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: addss %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB76_3:
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB76_6
; SSE-NEXT: .LBB76_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB76_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_7
; SSE-NEXT: # BB#8:
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: jmp .LBB76_9
; SSE-NEXT: .LBB76_7:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB76_9:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_10
; SSE-NEXT: # BB#11:
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: jmp .LBB76_12
; SSE-NEXT: .LBB76_10:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB76_12:
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_4i64_to_4f32:
@@ -4186,121 +4188,121 @@ define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) {
define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-LABEL: uitofp_load_8i64_to_8f32:
; SSE: # BB#0:
-; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm5
+; SSE-NEXT: movdqa (%rdi), %xmm5
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
; SSE-NEXT: movdqa 32(%rdi), %xmm2
-; SSE-NEXT: movdqa 48(%rdi), %xmm3
-; SSE-NEXT: movq %xmm5, %rax
+; SSE-NEXT: movdqa 48(%rdi), %xmm1
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB80_3
; SSE-NEXT: .LBB80_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm4
-; SSE-NEXT: addss %xmm4, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB80_3:
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
; SSE-NEXT: jmp .LBB80_6
; SSE-NEXT: .LBB80_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: addss %xmm4, %xmm4
; SSE-NEXT: .LBB80_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
; SSE-NEXT: movq %xmm5, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_7
; SSE-NEXT: # BB#8:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm6
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: jmp .LBB80_9
; SSE-NEXT: .LBB80_7:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm6
-; SSE-NEXT: addss %xmm6, %xmm6
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB80_9:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
+; SSE-NEXT: movq %xmm5, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_10
; SSE-NEXT: # BB#11:
-; SSE-NEXT: xorps %xmm5, %xmm5
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm6
; SSE-NEXT: jmp .LBB80_12
; SSE-NEXT: .LBB80_10:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm5, %xmm5
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
-; SSE-NEXT: addss %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm6
+; SSE-NEXT: addss %xmm6, %xmm6
; SSE-NEXT: .LBB80_12:
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_13
; SSE-NEXT: # BB#14:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm7
+; SSE-NEXT: xorps %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm5
; SSE-NEXT: jmp .LBB80_15
; SSE-NEXT: .LBB80_13:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm7
-; SSE-NEXT: addss %xmm7, %xmm7
+; SSE-NEXT: xorps %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: addss %xmm5, %xmm5
; SSE-NEXT: .LBB80_15:
-; SSE-NEXT: movq %xmm2, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_16
; SSE-NEXT: # BB#17:
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm7
; SSE-NEXT: jmp .LBB80_18
; SSE-NEXT: .LBB80_16:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm7
+; SSE-NEXT: addss %xmm7, %xmm7
; SSE-NEXT: .LBB80_18:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_19
; SSE-NEXT: # BB#20:
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: jmp .LBB80_21
; SSE-NEXT: .LBB80_19:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB80_21:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
@@ -4318,8 +4320,8 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB80_24:
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_8i64_to_8f32: