diff options
Diffstat (limited to 'test/CodeGen/X86/avx512-insert-extract.ll')
-rw-r--r-- | test/CodeGen/X86/avx512-insert-extract.ll | 163 |
1 files changed, 55 insertions, 108 deletions
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 7e0b981b2c6a9..7477e05f0c7f9 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -793,11 +793,10 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al ; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k1 -; KNL-NEXT: movl {{.*}}(%rip), %ecx -; KNL-NEXT: vpbroadcastd %ecx, %zmm0 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k1 -; KNL-NEXT: vpbroadcastd %ecx, %zmm1 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -1432,8 +1431,7 @@ define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) { ; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi ; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: andl $15, %edi -; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movb (%rdi,%rax), %al +; CHECK-NEXT: movb -24(%rsp,%rdi), %al ; CHECK-NEXT: retq %t2 = extractelement <16 x i8> %t1, i32 %index ret i8 %t2 @@ -1452,8 +1450,7 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) { ; CHECK-NEXT: ## kill: def %edi killed %edi def %rdi ; CHECK-NEXT: vmovaps %ymm0, (%rsp) ; CHECK-NEXT: andl $31, %edi -; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: movb (%rdi,%rax), %al +; CHECK-NEXT: movb (%rsp,%rdi), %al ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: vzeroupper @@ -1477,8 +1474,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) { ; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovaps %ymm0, (%rsp) ; KNL-NEXT: andl $63, %edi -; KNL-NEXT: movq %rsp, %rax -; KNL-NEXT: movb (%rdi,%rax), %al +; KNL-NEXT: movb (%rsp,%rdi), %al ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper @@ -1496,8 +1492,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) { ; SKX-NEXT: ## kill: def %edi killed %edi def %rdi ; SKX-NEXT: vmovaps %zmm0, (%rsp) ; SKX-NEXT: andl $63, %edi -; SKX-NEXT: movq %rsp, %rax -; SKX-NEXT: movb (%rdi,%rax), %al +; SKX-NEXT: movb (%rsp,%rdi), %al ; SKX-NEXT: movq %rbp, %rsp ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper @@ -1522,8 +1517,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index) ; KNL-NEXT: vmovaps %ymm0, (%rsp) ; KNL-NEXT: movzbl %dil, %eax ; KNL-NEXT: andl $63, %eax -; KNL-NEXT: movq %rsp, %rcx -; KNL-NEXT: movb (%rax,%rcx), %al +; KNL-NEXT: movb (%rsp,%rax), %al ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper @@ -1542,8 +1536,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index) ; SKX-NEXT: vmovaps %zmm0, (%rsp) ; SKX-NEXT: movzbl %dil, %eax ; SKX-NEXT: andl $63, %eax -; SKX-NEXT: movq %rsp, %rcx -; SKX-NEXT: movb (%rax,%rcx), %al +; SKX-NEXT: movb (%rsp,%rax), %al ; SKX-NEXT: movq %rbp, %rsp ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper @@ -1617,45 +1610,28 @@ define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) { ; KNL-LABEL: test_extractelement_varible_v8i1: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-64, %rsp -; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi ; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 ; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vmovdqa64 %zmm0, (%rsp) +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vpmovdw %zmm0, %ymm0 +; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $7, %edi -; KNL-NEXT: movzbl (%rsp,%rdi,8), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax ; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v8i1: ; SKX: ## %bb.0: -; SKX-NEXT: pushq %rbp -; SKX-NEXT: .cfi_def_cfa_offset 16 -; SKX-NEXT: .cfi_offset %rbp, -16 -; SKX-NEXT: movq %rsp, %rbp -; SKX-NEXT: .cfi_def_cfa_register %rbp -; SKX-NEXT: andq $-64, %rsp -; SKX-NEXT: subq $128, %rsp ; SKX-NEXT: ## kill: def %edi killed %edi def %rdi ; SKX-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 -; SKX-NEXT: vpmovm2q %k0, %zmm0 -; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) +; SKX-NEXT: vpmovm2w %k0, %xmm0 +; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $7, %edi -; SKX-NEXT: movzbl (%rsp,%rdi,8), %eax +; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: movq %rbp, %rsp -; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %t1 = icmp ugt <8 x i32> %a, %b @@ -1667,43 +1643,26 @@ define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) { ; KNL-LABEL: test_extractelement_varible_v16i1: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-64, %rsp -; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi ; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vmovdqa32 %zmm0, (%rsp) +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $15, %edi -; KNL-NEXT: movzbl (%rsp,%rdi,4), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi), %eax ; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v16i1: ; SKX: ## %bb.0: -; SKX-NEXT: pushq %rbp -; SKX-NEXT: .cfi_def_cfa_offset 16 -; SKX-NEXT: .cfi_offset %rbp, -16 -; SKX-NEXT: movq %rsp, %rbp -; SKX-NEXT: .cfi_def_cfa_register %rbp -; SKX-NEXT: andq $-64, %rsp -; SKX-NEXT: subq $128, %rsp ; SKX-NEXT: ## kill: def %edi killed %edi def %rdi ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 -; SKX-NEXT: vpmovm2d %k0, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) +; SKX-NEXT: vpmovm2b %k0, %xmm0 +; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $15, %edi -; SKX-NEXT: movzbl (%rsp,%rdi,4), %eax +; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax ; SKX-NEXT: andl $1, %eax -; SKX-NEXT: movq %rbp, %rsp -; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %t1 = icmp ugt <16 x i32> %a, %b @@ -1729,8 +1688,7 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b, ; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vmovdqa %ymm0, (%rsp) ; KNL-NEXT: andl $31, %edi -; KNL-NEXT: movq %rsp, %rax -; KNL-NEXT: movzbl (%rdi,%rax), %eax +; KNL-NEXT: movzbl (%rsp,%rdi), %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp @@ -1744,14 +1702,14 @@ define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b, ; SKX-NEXT: .cfi_offset %rbp, -16 ; SKX-NEXT: movq %rsp, %rbp ; SKX-NEXT: .cfi_def_cfa_register %rbp -; SKX-NEXT: andq $-64, %rsp -; SKX-NEXT: subq $128, %rsp +; SKX-NEXT: andq $-32, %rsp +; SKX-NEXT: subq $64, %rsp ; SKX-NEXT: ## kill: def %edi killed %edi def %rdi ; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 -; SKX-NEXT: vpmovm2w %k0, %zmm0 -; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) +; SKX-NEXT: vpmovm2b %k0, %ymm0 +; SKX-NEXT: vmovdqa %ymm0, (%rsp) ; SKX-NEXT: andl $31, %edi -; SKX-NEXT: movzbl (%rsp,%rdi,2), %eax +; SKX-NEXT: movzbl (%rsp,%rdi), %eax ; SKX-NEXT: andl $1, %eax ; SKX-NEXT: movq %rbp, %rsp ; SKX-NEXT: popq %rbp @@ -1792,8 +1750,7 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: andl $31, %esi ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; KNL-NEXT: setne (%rsi,%rax) +; KNL-NEXT: setne 32(%rsp,%rsi) ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 @@ -1817,20 +1774,18 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) { ; SKX-NEXT: .cfi_offset %rbp, -16 ; SKX-NEXT: movq %rsp, %rbp ; SKX-NEXT: .cfi_def_cfa_register %rbp -; SKX-NEXT: andq $-64, %rsp -; SKX-NEXT: subq $128, %rsp +; SKX-NEXT: andq $-32, %rsp +; SKX-NEXT: subq $64, %rsp ; SKX-NEXT: ## kill: def %esi killed %esi def %rsi ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k1 -; SKX-NEXT: xorl %eax, %eax -; SKX-NEXT: testb %dil, %dil -; SKX-NEXT: setne %al -; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} -; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) +; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ; SKX-NEXT: andl $31, %esi -; SKX-NEXT: movw %ax, (%rsp,%rsi,2) -; SKX-NEXT: vpsllw $15, (%rsp), %zmm0 -; SKX-NEXT: vpmovw2m %zmm0, %k0 +; SKX-NEXT: testb %dil, %dil +; SKX-NEXT: vpmovm2b %k0, %ymm0 +; SKX-NEXT: vmovdqa %ymm0, (%rsp) +; SKX-NEXT: setne (%rsp,%rsi) +; SKX-NEXT: vpsllw $7, (%rsp), %ymm0 +; SKX-NEXT: vpmovb2m %ymm0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: movq %rbp, %rsp ; SKX-NEXT: popq %rbp @@ -1863,8 +1818,7 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; KNL-NEXT: setne (%rsi,%rax) +; KNL-NEXT: setne 64(%rsp,%rsi) ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 @@ -1905,13 +1859,12 @@ define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) { ; SKX-NEXT: subq $128, %rsp ; SKX-NEXT: ## kill: def %esi killed %esi def %rsi ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k1 +; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 ; SKX-NEXT: andl $63, %esi ; SKX-NEXT: testb %dil, %dil -; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} +; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) -; SKX-NEXT: movq %rsp, %rax -; SKX-NEXT: setne (%rsi,%rax) +; SKX-NEXT: setne (%rsp,%rsi) ; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 ; SKX-NEXT: kmovq %k0, %rax @@ -2050,8 +2003,7 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) { ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) -; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rcx -; KNL-NEXT: setne (%rax,%rcx) +; KNL-NEXT: setne 128(%rsp,%rax) ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 @@ -2215,18 +2167,16 @@ define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) { ; SKX-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2 ; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k1 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k2 +; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0 +; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 ; SKX-NEXT: movl 744(%rbp), %eax ; SKX-NEXT: andl $127, %eax ; SKX-NEXT: cmpb $0, 736(%rbp) -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SKX-NEXT: vmovdqu8 %zmm0, %zmm1 {%k2} {z} -; SKX-NEXT: vmovdqa32 %zmm1, {{[0-9]+}}(%rsp) -; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: vpmovm2b %k1, %zmm0 +; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp) +; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) -; SKX-NEXT: movq %rsp, %rcx -; SKX-NEXT: setne (%rax,%rcx) +; SKX-NEXT: setne (%rsp,%rax) ; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 ; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 @@ -2270,8 +2220,7 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index ; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; KNL-NEXT: setne (%rsi,%rax) +; KNL-NEXT: setne 128(%rsp,%rsi) ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 @@ -2336,17 +2285,15 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index ; SKX-NEXT: subq $256, %rsp ## imm = 0x100 ; SKX-NEXT: ## kill: def %esi killed %esi def %rsi ; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k1 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k2 +; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0 +; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 ; SKX-NEXT: andl $127, %esi ; SKX-NEXT: testb %dil, %dil -; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; SKX-NEXT: vmovdqu8 %zmm0, %zmm1 {%k2} {z} -; SKX-NEXT: vmovdqa32 %zmm1, {{[0-9]+}}(%rsp) -; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: vpmovm2b %k1, %zmm0 +; SKX-NEXT: vmovdqa32 %zmm0, {{[0-9]+}}(%rsp) +; SKX-NEXT: vpmovm2b %k0, %zmm0 ; SKX-NEXT: vmovdqa32 %zmm0, (%rsp) -; SKX-NEXT: movq %rsp, %rax -; SKX-NEXT: setne (%rsi,%rax) +; SKX-NEXT: setne (%rsp,%rsi) ; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 ; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 |