diff options
Diffstat (limited to 'test/CodeGen/X86/rotate-extract-vector.ll')
| -rw-r--r-- | test/CodeGen/X86/rotate-extract-vector.ll | 122 | 
1 files changed, 46 insertions, 76 deletions
diff --git a/test/CodeGen/X86/rotate-extract-vector.ll b/test/CodeGen/X86/rotate-extract-vector.ll index 6059a76259ba..e2679dded8b5 100644 --- a/test/CodeGen/X86/rotate-extract-vector.ll +++ b/test/CodeGen/X86/rotate-extract-vector.ll @@ -12,10 +12,10 @@  define <4 x i32> @vroll_v4i32_extract_shl(<4 x i32> %i) {  ; CHECK-LABEL: vroll_v4i32_extract_shl:  ; CHECK:       # %bb.0: -; CHECK-NEXT:    vpslld $3, %xmm0, %xmm1 -; CHECK-NEXT:    vpslld $10, %xmm0, %xmm0 -; CHECK-NEXT:    vpsrld $25, %xmm1, %xmm1 -; CHECK-NEXT:    vpor %xmm0, %xmm1, %xmm0 +; CHECK-NEXT:    vpslld $3, %xmm0, %xmm0 +; CHECK-NEXT:    vprold $7, %zmm0, %zmm0 +; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0 +; CHECK-NEXT:    vzeroupper  ; CHECK-NEXT:    ret{{[l|q]}}    %lhs_mul = shl <4 x i32> %i, <i32 3, i32 3, i32 3, i32 3>    %rhs_mul = shl <4 x i32> %i, <i32 10, i32 10, i32 10, i32 10> @@ -25,20 +25,12 @@ define <4 x i32> @vroll_v4i32_extract_shl(<4 x i32> %i) {  }  define <4 x i64> @vrolq_v4i64_extract_shrl(<4 x i64> %i) nounwind { -; X86-LABEL: vrolq_v4i64_extract_shrl: -; X86:       # %bb.0: -; X86-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0 -; X86-NEXT:    vprolq $24, %zmm0, %zmm0 -; X86-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0 -; X86-NEXT:    retl -; -; X64-LABEL: vrolq_v4i64_extract_shrl: -; X64:       # %bb.0: -; X64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0 -; X64-NEXT:    vprolq $24, %zmm0, %zmm0 -; X64-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [18446744073189457919,18446744073189457919,18446744073189457919,18446744073189457919] -; X64-NEXT:    vpand %ymm1, %ymm0, %ymm0 -; X64-NEXT:    retq +; CHECK-LABEL: vrolq_v4i64_extract_shrl: +; CHECK:       # %bb.0: +; CHECK-NEXT:    vpsrlq $5, %ymm0, %ymm0 +; CHECK-NEXT:    vprolq $29, %zmm0, %zmm0 +; CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0 +; CHECK-NEXT:    ret{{[l|q]}}    %lhs_div = lshr <4 x i64> %i, <i64 40, i64 40, i64 40, i64 40>    %rhs_div = lshr <4 x i64> %i, <i64 5, i64 5, i64 5, i64 5>    %rhs_shift = shl <4 x i64> %rhs_div, <i64 29, i64 29, i64 29, i64 29> @@ -49,12 +41,10 @@ define <4 x i64> @vrolq_v4i64_extract_shrl(<4 x i64> %i) nounwind {  define <8 x i32> @vroll_extract_mul(<8 x i32> %i) nounwind {  ; CHECK-LABEL: vroll_extract_mul:  ; CHECK:       # %bb.0: -; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [640,640,640,640,640,640,640,640] -; CHECK-NEXT:    vpmulld %ymm1, %ymm0, %ymm1 -; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm2 = [10,10,10,10,10,10,10,10] -; CHECK-NEXT:    vpmulld %ymm2, %ymm0, %ymm0 -; CHECK-NEXT:    vpsrld $26, %ymm0, %ymm0 -; CHECK-NEXT:    vpor %ymm0, %ymm1, %ymm0 +; CHECK-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [10,10,10,10,10,10,10,10] +; CHECK-NEXT:    vpmulld %ymm1, %ymm0, %ymm0 +; CHECK-NEXT:    vprold $6, %zmm0, %zmm0 +; CHECK-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0  ; CHECK-NEXT:    ret{{[l|q]}}    %lhs_mul = mul <8 x i32> %i, <i32 640, i32 640, i32 640, i32 640, i32 640, i32 640, i32 640, i32 640>    %rhs_mul = mul <8 x i32> %i, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10> @@ -66,7 +56,7 @@ define <8 x i32> @vroll_extract_mul(<8 x i32> %i) nounwind {  define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {  ; X86-LABEL: vrolq_extract_udiv:  ; X86:       # %bb.0: -; X86-NEXT:    subl $60, %esp +; X86-NEXT:    subl $44, %esp  ; X86-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill  ; X86-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)  ; X86-NEXT:    vmovss %xmm0, (%esp) @@ -85,53 +75,27 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {  ; X86-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload  ; X86-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0  ; X86-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0 -; X86-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X86-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload -; X86-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT:    vmovss %xmm0, (%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $384, {{[0-9]+}}(%esp) # imm = 0x180 -; X86-NEXT:    calll __udivdi3 -; X86-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload -; X86-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT:    vextractps $2, %xmm0, (%esp) -; X86-NEXT:    movl $0, {{[0-9]+}}(%esp) -; X86-NEXT:    movl $384, {{[0-9]+}}(%esp) # imm = 0x180 -; X86-NEXT:    vmovd %eax, %xmm0 -; X86-NEXT:    vpinsrd $1, %edx, %xmm0, %xmm0 -; X86-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X86-NEXT:    calll __udivdi3 -; X86-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload -; X86-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0 -; X86-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0 -; X86-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload -; X86-NEXT:    vpsllq $57, %xmm1, %xmm1 -; X86-NEXT:    vpor %xmm0, %xmm1, %xmm0 -; X86-NEXT:    addl $60, %esp +; X86-NEXT:    vprolq $57, %zmm0, %zmm0 +; X86-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0 +; X86-NEXT:    addl $44, %esp +; X86-NEXT:    vzeroupper  ; X86-NEXT:    retl  ;  ; X64-LABEL: vrolq_extract_udiv:  ; X64:       # %bb.0:  ; X64-NEXT:    vpextrq $1, %xmm0, %rax -; X64-NEXT:    movabsq $-6148914691236517205, %rsi # imm = 0xAAAAAAAAAAAAAAAB -; X64-NEXT:    mulq %rsi -; X64-NEXT:    movq %rdx, %rcx -; X64-NEXT:    movq %rdx, %rax -; X64-NEXT:    shrq %rax -; X64-NEXT:    vmovq %rax, %xmm1 +; X64-NEXT:    movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB +; X64-NEXT:    mulq %rcx +; X64-NEXT:    shrq %rdx +; X64-NEXT:    vmovq %rdx, %xmm1  ; X64-NEXT:    vmovq %xmm0, %rax -; X64-NEXT:    mulq %rsi -; X64-NEXT:    movq %rdx, %rax -; X64-NEXT:    shrq %rax -; X64-NEXT:    vmovq %rax, %xmm0 +; X64-NEXT:    mulq %rcx +; X64-NEXT:    shrq %rdx +; X64-NEXT:    vmovq %rdx, %xmm0  ; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-NEXT:    shrq $8, %rcx -; X64-NEXT:    vmovq %rcx, %xmm1 -; X64-NEXT:    shrq $8, %rdx -; X64-NEXT:    vmovq %rdx, %xmm2 -; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; X64-NEXT:    vpsllq $57, %xmm0, %xmm0 -; X64-NEXT:    vpor %xmm1, %xmm0, %xmm0 +; X64-NEXT:    vprolq $57, %zmm0, %zmm0 +; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0 +; X64-NEXT:    vzeroupper  ; X64-NEXT:    retq    %lhs_div = udiv <2 x i64> %i, <i64 3, i64 3>    %rhs_div = udiv <2 x i64> %i, <i64 384, i64 384> @@ -141,17 +105,23 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {  }  define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind { -; CHECK-LABEL: vrolw_extract_mul_with_mask: -; CHECK:       # %bb.0: -; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [1152,1152,1152,1152] -; CHECK-NEXT:    vpmulld %xmm1, %xmm0, %xmm1 -; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [9,9,9,9] -; CHECK-NEXT:    vpmulld %xmm2, %xmm0, %xmm0 -; CHECK-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [160,160,160,160] -; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1 -; CHECK-NEXT:    vpsrld $25, %xmm0, %xmm0 -; CHECK-NEXT:    vpor %xmm0, %xmm1, %xmm0 -; CHECK-NEXT:    ret{{[l|q]}} +; X86-LABEL: vrolw_extract_mul_with_mask: +; X86:       # %bb.0: +; X86-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; X86-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 +; X86-NEXT:    vprold $7, %zmm0, %zmm0 +; X86-NEXT:    vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-NEXT:    vzeroupper +; X86-NEXT:    retl +; +; X64-LABEL: vrolw_extract_mul_with_mask: +; X64:       # %bb.0: +; X64-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [9,9,9,9] +; X64-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 +; X64-NEXT:    vprold $7, %zmm0, %zmm0 +; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT:    vzeroupper +; X64-NEXT:    retq    %lhs_mul = mul <4 x i32> %i, <i32 1152, i32 1152, i32 1152, i32 1152>    %rhs_mul = mul <4 x i32> %i, <i32 9, i32 9, i32 9, i32 9>    %lhs_and = and <4 x i32> %lhs_mul, <i32 160, i32 160, i32 160, i32 160>  | 
