summaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/rotate-extract-vector.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/rotate-extract-vector.ll')
-rw-r--r--test/CodeGen/X86/rotate-extract-vector.ll122
1 files changed, 46 insertions, 76 deletions
diff --git a/test/CodeGen/X86/rotate-extract-vector.ll b/test/CodeGen/X86/rotate-extract-vector.ll
index 6059a76259ba..e2679dded8b5 100644
--- a/test/CodeGen/X86/rotate-extract-vector.ll
+++ b/test/CodeGen/X86/rotate-extract-vector.ll
@@ -12,10 +12,10 @@
define <4 x i32> @vroll_v4i32_extract_shl(<4 x i32> %i) {
; CHECK-LABEL: vroll_v4i32_extract_shl:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpslld $3, %xmm0, %xmm1
-; CHECK-NEXT: vpslld $10, %xmm0, %xmm0
-; CHECK-NEXT: vpsrld $25, %xmm1, %xmm1
-; CHECK-NEXT: vpor %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vpslld $3, %xmm0, %xmm0
+; CHECK-NEXT: vprold $7, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT: vzeroupper
; CHECK-NEXT: ret{{[l|q]}}
%lhs_mul = shl <4 x i32> %i, <i32 3, i32 3, i32 3, i32 3>
%rhs_mul = shl <4 x i32> %i, <i32 10, i32 10, i32 10, i32 10>
@@ -25,20 +25,12 @@ define <4 x i32> @vroll_v4i32_extract_shl(<4 x i32> %i) {
}
define <4 x i64> @vrolq_v4i64_extract_shrl(<4 x i64> %i) nounwind {
-; X86-LABEL: vrolq_v4i64_extract_shrl:
-; X86: # %bb.0:
-; X86-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; X86-NEXT: vprolq $24, %zmm0, %zmm0
-; X86-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0
-; X86-NEXT: retl
-;
-; X64-LABEL: vrolq_v4i64_extract_shrl:
-; X64: # %bb.0:
-; X64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; X64-NEXT: vprolq $24, %zmm0, %zmm0
-; X64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [18446744073189457919,18446744073189457919,18446744073189457919,18446744073189457919]
-; X64-NEXT: vpand %ymm1, %ymm0, %ymm0
-; X64-NEXT: retq
+; CHECK-LABEL: vrolq_v4i64_extract_shrl:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpsrlq $5, %ymm0, %ymm0
+; CHECK-NEXT: vprolq $29, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; CHECK-NEXT: ret{{[l|q]}}
%lhs_div = lshr <4 x i64> %i, <i64 40, i64 40, i64 40, i64 40>
%rhs_div = lshr <4 x i64> %i, <i64 5, i64 5, i64 5, i64 5>
%rhs_shift = shl <4 x i64> %rhs_div, <i64 29, i64 29, i64 29, i64 29>
@@ -49,12 +41,10 @@ define <4 x i64> @vrolq_v4i64_extract_shrl(<4 x i64> %i) nounwind {
define <8 x i32> @vroll_extract_mul(<8 x i32> %i) nounwind {
; CHECK-LABEL: vroll_extract_mul:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [640,640,640,640,640,640,640,640]
-; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm1
-; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [10,10,10,10,10,10,10,10]
-; CHECK-NEXT: vpmulld %ymm2, %ymm0, %ymm0
-; CHECK-NEXT: vpsrld $26, %ymm0, %ymm0
-; CHECK-NEXT: vpor %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10]
+; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vprold $6, %zmm0, %zmm0
+; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; CHECK-NEXT: ret{{[l|q]}}
%lhs_mul = mul <8 x i32> %i, <i32 640, i32 640, i32 640, i32 640, i32 640, i32 640, i32 640, i32 640>
%rhs_mul = mul <8 x i32> %i, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
@@ -66,7 +56,7 @@ define <8 x i32> @vroll_extract_mul(<8 x i32> %i) nounwind {
define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
; X86-LABEL: vrolq_extract_udiv:
; X86: # %bb.0:
-; X86-NEXT: subl $60, %esp
+; X86-NEXT: subl $44, %esp
; X86-NEXT: vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; X86-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
; X86-NEXT: vmovss %xmm0, (%esp)
@@ -85,53 +75,27 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
; X86-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
; X86-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
-; X86-NEXT: vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; X86-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: vmovss %xmm0, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $384, {{[0-9]+}}(%esp) # imm = 0x180
-; X86-NEXT: calll __udivdi3
-; X86-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; X86-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: vextractps $2, %xmm0, (%esp)
-; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $384, {{[0-9]+}}(%esp) # imm = 0x180
-; X86-NEXT: vmovd %eax, %xmm0
-; X86-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
-; X86-NEXT: vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; X86-NEXT: calll __udivdi3
-; X86-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
-; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
-; X86-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
-; X86-NEXT: vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
-; X86-NEXT: vpsllq $57, %xmm1, %xmm1
-; X86-NEXT: vpor %xmm0, %xmm1, %xmm0
-; X86-NEXT: addl $60, %esp
+; X86-NEXT: vprolq $57, %zmm0, %zmm0
+; X86-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; X86-NEXT: addl $44, %esp
+; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
; X64-LABEL: vrolq_extract_udiv:
; X64: # %bb.0:
; X64-NEXT: vpextrq $1, %xmm0, %rax
-; X64-NEXT: movabsq $-6148914691236517205, %rsi # imm = 0xAAAAAAAAAAAAAAAB
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: vmovq %rax, %xmm1
+; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
+; X64-NEXT: mulq %rcx
+; X64-NEXT: shrq %rdx
+; X64-NEXT: vmovq %rdx, %xmm1
; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: mulq %rsi
-; X64-NEXT: movq %rdx, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: mulq %rcx
+; X64-NEXT: shrq %rdx
+; X64-NEXT: vmovq %rdx, %xmm0
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X64-NEXT: shrq $8, %rcx
-; X64-NEXT: vmovq %rcx, %xmm1
-; X64-NEXT: shrq $8, %rdx
-; X64-NEXT: vmovq %rdx, %xmm2
-; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; X64-NEXT: vpsllq $57, %xmm0, %xmm0
-; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
+; X64-NEXT: vprolq $57, %zmm0, %zmm0
+; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; X64-NEXT: vzeroupper
; X64-NEXT: retq
%lhs_div = udiv <2 x i64> %i, <i64 3, i64 3>
%rhs_div = udiv <2 x i64> %i, <i64 384, i64 384>
@@ -141,17 +105,23 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
}
define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind {
-; CHECK-LABEL: vrolw_extract_mul_with_mask:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1152,1152,1152,1152]
-; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm1
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [9,9,9,9]
-; CHECK-NEXT: vpmulld %xmm2, %xmm0, %xmm0
-; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [160,160,160,160]
-; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
-; CHECK-NEXT: vpsrld $25, %xmm0, %xmm0
-; CHECK-NEXT: vpor %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: ret{{[l|q]}}
+; X86-LABEL: vrolw_extract_mul_with_mask:
+; X86: # %bb.0:
+; X86-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
+; X86-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X86-NEXT: vprold $7, %zmm0, %zmm0
+; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: vrolw_extract_mul_with_mask:
+; X64: # %bb.0:
+; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9]
+; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; X64-NEXT: vprold $7, %zmm0, %zmm0
+; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
%lhs_mul = mul <4 x i32> %i, <i32 1152, i32 1152, i32 1152, i32 1152>
%rhs_mul = mul <4 x i32> %i, <i32 9, i32 9, i32 9, i32 9>
%lhs_and = and <4 x i32> %lhs_mul, <i32 160, i32 160, i32 160, i32 160>